Esempio n. 1
0
nt main(int argc, char *argv[]) {
    FILE *genome_f;
    int read_size=0; //the size of the reads
    int table_factor; //arbitrary
    int num_of_reads;
    int bf_table_size = table_factor*num_of_reads;
    int num_of_hash_func;

    BloomFilter* bf_unique; //BF for the unique tries
  //  char* output_label=(char *)malloc(50); //label name for the output files
    hattrie_t* trie_unique; //hattrie that holds the unique reads
    hattrie_t* trie_repeat; //hattrie that holds the repetetive reads, and the one that has N inside of them.
    hattrie_t* trie_genome_unique; //put 'accepts' (everything that uniqe BF says yes that it's in genome) into a trie
    hattrie_t* trie_fp; //triw that holds false negatives set
    hattrie_t* trie_fn;//trie that holds false positives set
//    f = fopen(argv[1], "r");
    trie_repeat = hattrie_create();
    trie_unique = hattrie_create();
    make_repeat_and_unique_tries((argv[1], "r"), trie_unique, trie_repeat);
    table_factor = 10; //arbitrary
    num_of_reads = line_number/2;
    bf_table_size = table_factor*num_of_reads;
    num_of_hash_func = (int) ceil(table_factor*0.69314);
    //print the keys of the uniqe and repaet tries
    hattrie_iteration(trie_unique, "unique", argv[3]);
    hattrie_iteration(trie_repeat, "repeat", argv[3]);
//hashing uniqe reads trie using bloom filter
    bf_unique = bloom_filter_new(bf_table_size, string_hash, num_of_hash_func);
    hash_trie_into_bf(trie_unique, bf_unique);
    check_if_trie_in_bf(trie_unique, bf_unique);
//create trie for all of the sliding windows in the genome reference which are in the unique reads according to the bf_unique
    read_size = size-2;
    trie_genome_unique = hattrie_create();
    query_bf_with_genome(bf_unique, genome_f ,trie_genome_unique, read_size);
    check_if_trie_in_bf(trie_unique, bf_unique);
    fclose(genome_f);
    hattrie_iteration(trie_genome_unique, "genome_unique", argv[3]);
    trie_fp = hattrie_create();
    trie_fn = hattrie_create();

    printf("start checking for false positive \n");
    check_fp(trie_unique,trie_genome_unique, trie_fp);
    hattrie_iteration(trie_fp, "fp_unique", argv[3]);

    printf("start checking for false negative \n");
    check_fn(trie_unique,trie_genome_unique, trie_fn);
    hattrie_iteration(trie_fn, "fn_unique", argv[3]);

    bloom_filter_free(bf_unique);
    free(buffer);
    hattrie_free(trie_unique);
    hattrie_free(trie_repeat);
    hattrie_free(trie_genome_unique);
    hattrie_free(trie_fn);
    hattrie_free(trie_fp);
    return 0;
}
Esempio n. 2
0
/**
 * Ad-hoc command-line spell checker
 */
int main(int argc, char *argv[])
{
    // Open the dictionary file
    FILE *fp; 
    if (!(fp = fopen("dictionary", "r"))) {
        fprintf(stderr, "E: Couldn't open words file\n");
        fflush (stderr);
        return 1;
    }

    // Create a bloom filter
    bloom_t *filter = bloom_filter_new(2500000);

    // Add all dictionary words to the filter
    char *p;
    char line[1024];
    while (fgets(line, 1024, fp)) {
        strip(line);
        bloom_filter_add(filter, line);
    }
    fclose(fp);
    printf("bloom filter count : %u\n", bloom_filter_count(filter));
    printf("bloom filter size  : %u\n", bloom_filter_size(filter));

    // Read words from stdin and print those words not in the bloom filter
    while (fgets(line, 1024, stdin)) {
        strip(line);
        p = strtok(line, " \t,.;:\r\n?!-/()");
        while (p) {
            if (!bloom_filter_contains(filter, p)) {
                printf("%s\n", p);
            }
            p = strtok(NULL, " \t,.;:\r\n?!-/()");
        }
    }

    // Cleanup
    bloom_filter_free(filter);
    return 0;
}
Esempio n. 3
0
BloomFilter *bloom_filter_intersection(BloomFilter *filter1, 
                                       BloomFilter *filter2)
{
	BloomFilter *result;
	unsigned int i;
	unsigned int array_size;

	/* To perform this operation, both filters must be created with
	 * the same values. */

	if (filter1->table_size != filter2->table_size
	 || filter1->num_functions != filter2->num_functions
	 || filter1->hash_func != filter2->hash_func) {
		return NULL;
	}

	/* Create a new bloom filter for the result */

	result = bloom_filter_new(filter1->table_size, 
	                          filter1->hash_func, 
	                          filter1->num_functions);

	if (result == NULL) {
		return NULL;
	}

	/* The table is an array of bits, packed into bytes.  Round up
	 * to the nearest byte. */

	array_size = (filter1->table_size + 7) / 8;

	/* Populate the table of the new filter */

	for (i=0; i<array_size; ++i) {
		result->table[i] = filter1->table[i] & filter2->table[i];
	}

	return result;
}
Esempio n. 4
0
int main(int argc, char **argv){
	int					sockfd_fp, sockfd_data, connfd, udpfd, nready, maxfdp1, listenfd2;
	char				mesg[MAXLINE];
	pid_t				childpid, workerpid;
	struct sockaddr_in	servaddr, cliaddr;
	fd_set				rset;
	const int			on = 1;
	int client_fds[FD_SETSIZE];
	void				sig_chld(int);

	/* hash table init
	 * Contain all the fps, maybe should be a cache and read from disk
	 */
	hash_init(1024);
	/*
	 * Create a bloom filter, the size NEED TO tradeoff
	 */
	filter = bloom_filter_new(5000);

	if (!(fpfile_all = fopen("/tmp/fp.all.out", "w"))) {
        fprintf(stderr, "E: Couldn't open file for write all fp\n");
        fflush (stderr);
        return;
    }

	/* Channel : fingerprint packet recv */
	sockfd_fp = get_udp_socket(SERV_PORT+1);
	int a = 65535;
	Setsockopt(sockfd_fp, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int));
	Setsockopt(sockfd_fp, SOL_SOCKET, SO_SNDBUF, &a, sizeof(int));


	/* Channel : other packet */
	listenfd2 = Socket(AF_INET, SOCK_STREAM, 0);

	bzero(&servaddr, sizeof(servaddr));
	servaddr.sin_family      = AF_INET;
	servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
	servaddr.sin_port        = htons(SERV_PORT);

	Setsockopt(listenfd2, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
	Bind(listenfd2, (SA *) &servaddr, sizeof(servaddr));

	Listen(listenfd2, LISTENQ);

	// Channel to POX controller
	bzero(&fakeaddr, sizeof(fakeaddr));
	fakeaddr.sin_family = AF_INET;
	fakeaddr.sin_port = htons(BLOOM_PORT);
	Inet_pton(AF_INET, "10.0.0.1", &fakeaddr.sin_addr);

	sockfdbloom = Socket(AF_INET, SOCK_DGRAM, 0);

	Signal(SIGCHLD, sig_chld);	/* must call waitpid() */

	FD_ZERO(&rset);
	maxfdp1 = max(sockfd_fp, listenfd2);
	connfd = -1;
	int i, maxi = -1;
	for(i=0; i<FD_SETSIZE; i++)
		client_fds[i] = -1;
	for ( ; ; ) {
		FD_SET(sockfd_fp, &rset);
		FD_SET(listenfd2, &rset);
		if ( (nready = select(maxfdp1 + 1, &rset, NULL, NULL, NULL)) < 0) {
			if (errno == EINTR)
				continue;		/* back to for() */
			else
				err_sys("select error");
		}

		if (FD_ISSET(sockfd_fp, &rset)) {
			recv_fp(sockfd_fp);
		}

		if (FD_ISSET(listenfd2, &rset)) {
			int len = sizeof(cliaddr);
			connfd = Accept(listenfd2, (SA *) &cliaddr, &len);
			int a = 65535;
			Setsockopt(connfd, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int));
			if ( (childpid = Fork()) == 0) {	/* child process TO recv file data */
				Close(listenfd2);	/* close listening socket */
				recv_chunk(connfd);	// LOOP 
				exit(0);
			}
			Close(connfd);			/* parent closes connected socket */
		}
		
	}
	// cannot get here 
}
Esempio n. 5
0
static void test_bloom_filter (void) {
  BloomFilter* filter;

  filter = bloom_filter_new (16, string_hash, 10);
  bloom_filter_free (filter);
  }
Esempio n. 6
0
int main(int argc, char **argv){
	int					sockfd_fp, sockfd_data, connfd, udpfd, nready, maxfdp1, listenfd2;
	char				mesg[MAXLINE];
	pid_t				childpid, workerpid;
	struct sockaddr_in	servaddr, cliaddr;
	fd_set				rset;
	const int			on = 1;
	int client_fds[FD_SETSIZE];
	void				sig_chld(int);
	char *dedu_method = NULL;

	//handle the options 
	int c;
	int opterr = 0;
	while ((c = getopt (argc, argv, "m:")) != -1){
		switch (c){
			case 'm':
        		dedu_method = optarg;
        		break;
      		case '?':
          		fprintf (stderr,"Unknown option character `\\x%x'.\n",optopt);
        		return 1;
      		default:
        		abort ();
      	}	
	}

	if(strcmp(dedu_method, "sdna") == 0){
		sdna_on = 1;
		bloom_on = 1;
	}else if(strcmp(dedu_method, "bloom") ==0){
		bloom_on = 1;
	}
		

	/*
	 * Create a bloom filter, the size NEED TO tradeoff
	 */
	if(bloom_on)
		filter = bloom_filter_new(BLOOM_FILTER_SIZE);
	/*
	if (!(fpfile_all = fopen("/tmp/fp.all.out", "w"))) {
        fprintf(stderr, "E: Couldn't open file for write all fp\n");
        fflush (stderr);
        return;
    }
	*/

	listenfd2 = Socket(AF_INET, SOCK_STREAM, 0);

	bzero(&servaddr, sizeof(servaddr));
	servaddr.sin_family      = AF_INET;
	servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
	servaddr.sin_port        = htons(SERV_PORT);

	Setsockopt(listenfd2, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
	Bind(listenfd2, (SA *) &servaddr, sizeof(servaddr));

	Listen(listenfd2, LISTENQ);

	// Channel to POX controller
	bzero(&fakeaddr, sizeof(fakeaddr));
	fakeaddr.sin_family = AF_INET;
	fakeaddr.sin_port = htons(BLOOM_PORT);
	Inet_pton(AF_INET, "10.0.0.1", &fakeaddr.sin_addr);

	sockfdbloom = Socket(AF_INET, SOCK_DGRAM, 0);

	Signal(SIGCHLD, sig_chld);	/* must call waitpid() */

	FD_ZERO(&rset);
	maxfdp1 = listenfd2;
	connfd = -1;
	for ( ; ; ) {
		FD_SET(listenfd2, &rset);
		if ( (nready = select(maxfdp1 + 1, &rset, NULL, NULL, NULL)) < 0) {
			if (errno == EINTR)
				continue;		/* back to for() */
			else
				err_sys("select error");
		}

		if (FD_ISSET(listenfd2, &rset)) {
			int len = sizeof(cliaddr);
			connfd = Accept(listenfd2, (SA *) &cliaddr, &len);
			int a = 65535;
			Setsockopt(connfd, SOL_SOCKET, SO_RCVBUF, &a, sizeof(int));
			/* here use single process model 
			 * NOT USE MULTI PROCESS BY FORK, otherwise need IPC to maintain hash table
			 */
			recv_chunk(connfd);	// LOOP 
		}
		
	}
	// cannot get here 
}