nt main(int argc, char *argv[]) { FILE *genome_f; int read_size=0; //the size of the reads int table_factor; //arbitrary int num_of_reads; int bf_table_size = table_factor*num_of_reads; int num_of_hash_func; BloomFilter* bf_unique; //BF for the unique tries // char* output_label=(char *)malloc(50); //label name for the output files hattrie_t* trie_unique; //hattrie that holds the unique reads hattrie_t* trie_repeat; //hattrie that holds the repetetive reads, and the one that has N inside of them. hattrie_t* trie_genome_unique; //put 'accepts' (everything that uniqe BF says yes that it's in genome) into a trie hattrie_t* trie_fp; //triw that holds false negatives set hattrie_t* trie_fn;//trie that holds false positives set // f = fopen(argv[1], "r"); trie_repeat = hattrie_create(); trie_unique = hattrie_create(); make_repeat_and_unique_tries((argv[1], "r"), trie_unique, trie_repeat); table_factor = 10; //arbitrary num_of_reads = line_number/2; bf_table_size = table_factor*num_of_reads; num_of_hash_func = (int) ceil(table_factor*0.69314); //print the keys of the uniqe and repaet tries hattrie_iteration(trie_unique, "unique", argv[3]); hattrie_iteration(trie_repeat, "repeat", argv[3]); //hashing uniqe reads trie using bloom filter bf_unique = bloom_filter_new(bf_table_size, string_hash, num_of_hash_func); hash_trie_into_bf(trie_unique, bf_unique); check_if_trie_in_bf(trie_unique, bf_unique); //create trie for all of the sliding windows in the genome reference which are in the unique reads according to the bf_unique read_size = size-2; trie_genome_unique = hattrie_create(); query_bf_with_genome(bf_unique, genome_f ,trie_genome_unique, read_size); check_if_trie_in_bf(trie_unique, bf_unique); fclose(genome_f); hattrie_iteration(trie_genome_unique, "genome_unique", argv[3]); trie_fp = hattrie_create(); trie_fn = hattrie_create(); printf("start checking for false positive \n"); check_fp(trie_unique,trie_genome_unique, trie_fp); hattrie_iteration(trie_fp, "fp_unique", argv[3]); printf("start checking for false negative \n"); check_fn(trie_unique,trie_genome_unique, trie_fn); hattrie_iteration(trie_fn, "fn_unique", argv[3]); bloom_filter_free(bf_unique); free(buffer); hattrie_free(trie_unique); hattrie_free(trie_repeat); hattrie_free(trie_genome_unique); hattrie_free(trie_fn); hattrie_free(trie_fp); return 0; }
/** * Ad-hoc command-line spell checker */ int main(int argc, char *argv[]) { // Open the dictionary file FILE *fp; if (!(fp = fopen("dictionary", "r"))) { fprintf(stderr, "E: Couldn't open words file\n"); fflush (stderr); return 1; } // Create a bloom filter bloom_t *filter = bloom_filter_new(2500000); // Add all dictionary words to the filter char *p; char line[1024]; while (fgets(line, 1024, fp)) { strip(line); bloom_filter_add(filter, line); } fclose(fp); printf("bloom filter count : %u\n", bloom_filter_count(filter)); printf("bloom filter size : %u\n", bloom_filter_size(filter)); // Read words from stdin and print those words not in the bloom filter while (fgets(line, 1024, stdin)) { strip(line); p = strtok(line, " \t,.;:\r\n?!-/()"); while (p) { if (!bloom_filter_contains(filter, p)) { printf("%s\n", p); } p = strtok(NULL, " \t,.;:\r\n?!-/()"); } } // Cleanup bloom_filter_free(filter); return 0; }
BloomFilter *bloom_filter_intersection(BloomFilter *filter1, BloomFilter *filter2) { BloomFilter *result; unsigned int i; unsigned int array_size; /* To perform this operation, both filters must be created with * the same values. */ if (filter1->table_size != filter2->table_size || filter1->num_functions != filter2->num_functions || filter1->hash_func != filter2->hash_func) { return NULL; } /* Create a new bloom filter for the result */ result = bloom_filter_new(filter1->table_size, filter1->hash_func, filter1->num_functions); if (result == NULL) { return NULL; } /* The table is an array of bits, packed into bytes. Round up * to the nearest byte. */ array_size = (filter1->table_size + 7) / 8; /* Populate the table of the new filter */ for (i=0; i<array_size; ++i) { result->table[i] = filter1->table[i] & filter2->table[i]; } return result; }
int main(int argc, char **argv){ int sockfd_fp, sockfd_data, connfd, udpfd, nready, maxfdp1, listenfd2; char mesg[MAXLINE]; pid_t childpid, workerpid; struct sockaddr_in servaddr, cliaddr; fd_set rset; const int on = 1; int client_fds[FD_SETSIZE]; void sig_chld(int); /* hash table init * Contain all the fps, maybe should be a cache and read from disk */ hash_init(1024); /* * Create a bloom filter, the size NEED TO tradeoff */ filter = bloom_filter_new(5000); if (!(fpfile_all = fopen("/tmp/fp.all.out", "w"))) { fprintf(stderr, "E: Couldn't open file for write all fp\n"); fflush (stderr); return; } /* Channel : fingerprint packet recv */ sockfd_fp = get_udp_socket(SERV_PORT+1); int a = 65535; Setsockopt(sockfd_fp, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int)); Setsockopt(sockfd_fp, SOL_SOCKET, SO_SNDBUF, &a, sizeof(int)); /* Channel : other packet */ listenfd2 = Socket(AF_INET, SOCK_STREAM, 0); bzero(&servaddr, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_addr.s_addr = htonl(INADDR_ANY); servaddr.sin_port = htons(SERV_PORT); Setsockopt(listenfd2, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); Bind(listenfd2, (SA *) &servaddr, sizeof(servaddr)); Listen(listenfd2, LISTENQ); // Channel to POX controller bzero(&fakeaddr, sizeof(fakeaddr)); fakeaddr.sin_family = AF_INET; fakeaddr.sin_port = htons(BLOOM_PORT); Inet_pton(AF_INET, "10.0.0.1", &fakeaddr.sin_addr); sockfdbloom = Socket(AF_INET, SOCK_DGRAM, 0); Signal(SIGCHLD, sig_chld); /* must call waitpid() */ FD_ZERO(&rset); maxfdp1 = max(sockfd_fp, listenfd2); connfd = -1; int i, maxi = -1; for(i=0; i<FD_SETSIZE; i++) client_fds[i] = -1; for ( ; ; ) { FD_SET(sockfd_fp, &rset); FD_SET(listenfd2, &rset); if ( (nready = select(maxfdp1 + 1, &rset, NULL, NULL, NULL)) < 0) { if (errno == EINTR) continue; /* back to for() */ else err_sys("select error"); } if (FD_ISSET(sockfd_fp, &rset)) { recv_fp(sockfd_fp); } if (FD_ISSET(listenfd2, &rset)) { int len = sizeof(cliaddr); connfd = Accept(listenfd2, (SA *) &cliaddr, &len); int a = 65535; Setsockopt(connfd, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int)); if ( (childpid = Fork()) == 0) { /* child process TO recv file data */ Close(listenfd2); /* close listening socket */ recv_chunk(connfd); // LOOP exit(0); } Close(connfd); /* parent closes connected socket */ } } // cannot get here }
static void test_bloom_filter (void) { BloomFilter* filter; filter = bloom_filter_new (16, string_hash, 10); bloom_filter_free (filter); }
int main(int argc, char **argv){ int sockfd_fp, sockfd_data, connfd, udpfd, nready, maxfdp1, listenfd2; char mesg[MAXLINE]; pid_t childpid, workerpid; struct sockaddr_in servaddr, cliaddr; fd_set rset; const int on = 1; int client_fds[FD_SETSIZE]; void sig_chld(int); char *dedu_method = NULL; //handle the options int c; int opterr = 0; while ((c = getopt (argc, argv, "m:")) != -1){ switch (c){ case 'm': dedu_method = optarg; break; case '?': fprintf (stderr,"Unknown option character `\\x%x'.\n",optopt); return 1; default: abort (); } } if(strcmp(dedu_method, "sdna") == 0){ sdna_on = 1; bloom_on = 1; }else if(strcmp(dedu_method, "bloom") ==0){ bloom_on = 1; } /* * Create a bloom filter, the size NEED TO tradeoff */ if(bloom_on) filter = bloom_filter_new(BLOOM_FILTER_SIZE); /* if (!(fpfile_all = fopen("/tmp/fp.all.out", "w"))) { fprintf(stderr, "E: Couldn't open file for write all fp\n"); fflush (stderr); return; } */ listenfd2 = Socket(AF_INET, SOCK_STREAM, 0); bzero(&servaddr, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_addr.s_addr = htonl(INADDR_ANY); servaddr.sin_port = htons(SERV_PORT); Setsockopt(listenfd2, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); Bind(listenfd2, (SA *) &servaddr, sizeof(servaddr)); Listen(listenfd2, LISTENQ); // Channel to POX controller bzero(&fakeaddr, sizeof(fakeaddr)); fakeaddr.sin_family = AF_INET; fakeaddr.sin_port = htons(BLOOM_PORT); Inet_pton(AF_INET, "10.0.0.1", &fakeaddr.sin_addr); sockfdbloom = Socket(AF_INET, SOCK_DGRAM, 0); Signal(SIGCHLD, sig_chld); /* must call waitpid() */ FD_ZERO(&rset); maxfdp1 = listenfd2; connfd = -1; for ( ; ; ) { FD_SET(listenfd2, &rset); if ( (nready = select(maxfdp1 + 1, &rset, NULL, NULL, NULL)) < 0) { if (errno == EINTR) continue; /* back to for() */ else err_sys("select error"); } if (FD_ISSET(listenfd2, &rset)) { int len = sizeof(cliaddr); connfd = Accept(listenfd2, (SA *) &cliaddr, &len); int a = 65535; Setsockopt(connfd, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int)); /* here use single process model * NOT USE MULTI PROCESS BY FORK, otherwise need IPC to maintain hash table */ recv_chunk(connfd); // LOOP } } // cannot get here }