示例#1
0
nt main(int argc, char *argv[]) {
    FILE *genome_f;
    int read_size=0; //the size of the reads
    int table_factor; //arbitrary
    int num_of_reads;
    int bf_table_size = table_factor*num_of_reads;
    int num_of_hash_func;

    BloomFilter* bf_unique; //BF for the unique tries
  //  char* output_label=(char *)malloc(50); //label name for the output files
    hattrie_t* trie_unique; //hattrie that holds the unique reads
    hattrie_t* trie_repeat; //hattrie that holds the repetetive reads, and the one that has N inside of them.
    hattrie_t* trie_genome_unique; //put 'accepts' (everything that uniqe BF says yes that it's in genome) into a trie
    hattrie_t* trie_fp; //triw that holds false negatives set
    hattrie_t* trie_fn;//trie that holds false positives set
//    f = fopen(argv[1], "r");
    trie_repeat = hattrie_create();
    trie_unique = hattrie_create();
    make_repeat_and_unique_tries((argv[1], "r"), trie_unique, trie_repeat);
    table_factor = 10; //arbitrary
    num_of_reads = line_number/2;
    bf_table_size = table_factor*num_of_reads;
    num_of_hash_func = (int) ceil(table_factor*0.69314);
    //print the keys of the uniqe and repaet tries
    hattrie_iteration(trie_unique, "unique", argv[3]);
    hattrie_iteration(trie_repeat, "repeat", argv[3]);
//hashing uniqe reads trie using bloom filter
    bf_unique = bloom_filter_new(bf_table_size, string_hash, num_of_hash_func);
    hash_trie_into_bf(trie_unique, bf_unique);
    check_if_trie_in_bf(trie_unique, bf_unique);
//create trie for all of the sliding windows in the genome reference which are in the unique reads according to the bf_unique
    read_size = size-2;
    trie_genome_unique = hattrie_create();
    query_bf_with_genome(bf_unique, genome_f ,trie_genome_unique, read_size);
    check_if_trie_in_bf(trie_unique, bf_unique);
    fclose(genome_f);
    hattrie_iteration(trie_genome_unique, "genome_unique", argv[3]);
    trie_fp = hattrie_create();
    trie_fn = hattrie_create();

    printf("start checking for false positive \n");
    check_fp(trie_unique,trie_genome_unique, trie_fp);
    hattrie_iteration(trie_fp, "fp_unique", argv[3]);

    printf("start checking for false negative \n");
    check_fn(trie_unique,trie_genome_unique, trie_fn);
    hattrie_iteration(trie_fn, "fn_unique", argv[3]);

    bloom_filter_free(bf_unique);
    free(buffer);
    hattrie_free(trie_unique);
    hattrie_free(trie_repeat);
    hattrie_free(trie_genome_unique);
    hattrie_free(trie_fn);
    hattrie_free(trie_fp);
    return 0;
}
int main()
{
    hattrie_t* T = hattrie_create();
    const size_t n = 1000000;  // how many strings
    const size_t m_low  = 50;  // minimum length of each string
    const size_t m_high = 500; // maximum length of each string
    char x[501];

    size_t i, m;
    for (i = 0; i < n; ++i) {
        m = m_low + rand() % (m_high - m_low);
        randstr(x, m);
        *hattrie_get(T, x, m) = 1;
    }

    hattrie_iter_t* it;
    clock_t t0, t;
    const size_t repetitions = 100;
    size_t r;

    /* iterate in unsorted order */
    fprintf(stderr, "iterating out of order ... ");
    t0 = clock();
    for (r = 0; r < repetitions; ++r) {
        it = hattrie_iter_begin(T, false);
        while (!hattrie_iter_finished(it)) {
            hattrie_iter_next(it);
        }
        hattrie_iter_free(it);
    }
    t = clock();
    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);


    /* iterate in sorted order */
    fprintf(stderr, "iterating in order ... ");
    t0 = clock();
    for (r = 0; r < repetitions; ++r) {
        it = hattrie_iter_begin(T, true);
        while (!hattrie_iter_finished(it)) {
            hattrie_iter_next(it);
        }
        hattrie_iter_free(it);
    }
    t = clock();
    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);


    hattrie_free(T);

    return 0;
}
示例#3
0
void test_trie_non_ascii()
{
    fprintf(stderr, "checking non-ascii... \n");

    value_t* u;
    hattrie_t* T = hattrie_create();
    char* txt = "\x81\x70";

    u = hattrie_get(T, txt, strlen(txt));
    *u = 10;

    u = hattrie_tryget(T, txt, strlen(txt));
    if (*u != 10) {
        fprintf(stderr, "can't store non-ascii strings\n");
    }
    hattrie_free(T);

    fprintf(stderr, "done.\n");
}
示例#4
0
void setup()
{
    fprintf(stderr, "generating %zu keys ... ", n);
    xs = malloc(n * sizeof(char*));
    ds = malloc(d * sizeof(char*));
    size_t i;
    size_t m;
    for (i = 0; i < n; ++i) {
        m = m_low + rand() % (m_high - m_low);
        xs[i] = malloc(m + 1);
        randstr(xs[i], m);
    }
    for (i = 0; i < d; ++i) {
        m = rand()%n;
        ds[i] = xs[m];
    }

    T = hattrie_create();
    M = str_map_create();
    fprintf(stderr, "done.\n");
}
示例#5
0
文件: zone-tree.c 项目: idtek/knot
zone_tree_t* zone_tree_create()
{
	return hattrie_create();
}
示例#6
0
文件: hattrie.c 项目: idtek/knot
int main(int argc, char *argv[])
{
	plan_lazy();

	/* Random keys. */
	srand(time(NULL));
	unsigned key_count = 100000;
	char **keys = malloc(sizeof(char*) * key_count);
	for (unsigned i = 0; i < key_count; ++i) {
		keys[i] = str_key_rand(KEY_MAXLEN);
	}

	/* Sort random keys. */
	str_key_sort(keys, key_count);

	/* Create trie */
	value_t *val = NULL;
	hattrie_t *trie = hattrie_create();
	ok(trie != NULL, "hattrie: create");

	/* Insert keys */
	bool passed = true;
	size_t inserted = 0;
	for (unsigned i = 0; i < key_count; ++i) {
		val = hattrie_get(trie, keys[i], strlen(keys[i]) + 1);
		if (!val) {
			passed = false;
			break;
		}
		if (*val == NULL) {
			*val = keys[i];
			++inserted;
		}
	}
	ok(passed, "hattrie: insert");

	/* Check total insertions against trie weight. */
	is_int(hattrie_weight(trie), inserted, "hattrie: trie weight matches insertions");

	/* Build order-index. */
	hattrie_build_index(trie);

	/* Lookup all keys */
	passed = true;
	for (unsigned i = 0; i < key_count; ++i) {
		val = hattrie_tryget(trie, keys[i], strlen(keys[i]) + 1);
		if (val && (*val == keys[i] || strcmp(*val, keys[i]) == 0)) {
			continue;
		} else {
			diag("hattrie: mismatch on element '%u'", i);
			passed = false;
			break;
		}
	}
	ok(passed, "hattrie: lookup all keys");

	/* Lesser or equal lookup. */
	passed = true;
	for (unsigned i = 0; i < key_count; ++i) {
		if (!str_key_find_leq(trie, keys, i, key_count)) {
			passed = false;
			for (int off = -10; off < 10; ++off) {
				int k = (int)i + off;
				if (k < 0 || k >= key_count) {
					continue;
				}
				diag("[%u/%d]: %s%s", i, off, off == 0?">":"",keys[k]);
			}
			break;
		}
	}
	ok(passed, "hattrie: find lesser or equal for all keys");

	/* Next lookup. */
	passed = true;
	for (unsigned i = 0; i < key_count - 1 && passed; ++i) {
		value_t *val;
		hattrie_find_next(trie, keys[i], strlen(keys[i]), &val);
		passed = val && *val == (void *)keys[(i + 1)];
	}
	ok(passed, "hattrie: find next for all keys");

	/* Unsorted iteration */
	size_t iterated = 0;
	hattrie_iter_t *it = hattrie_iter_begin(trie, false);
	while (!hattrie_iter_finished(it)) {
		++iterated;
		hattrie_iter_next(it);
	}
	is_int(inserted, iterated, "hattrie: unsorted iteration");
	hattrie_iter_free(it);

	/* Sorted iteration. */
	char key_buf[KEY_MAXLEN] = {'\0'};
	iterated = 0;
	it = hattrie_iter_begin(trie, true);
	while (!hattrie_iter_finished(it)) {
		size_t cur_key_len = 0;
		const char *cur_key = hattrie_iter_key(it, &cur_key_len);
		if (iterated > 0) { /* Only if previous exists. */
			if (strcmp(key_buf, cur_key) > 0) {
				diag("'%s' <= '%s' FAIL\n", key_buf, cur_key);
				break;
			}
		}
		++iterated;
		memcpy(key_buf, cur_key, cur_key_len);
		hattrie_iter_next(it);
	}
	is_int(inserted, iterated, "hattrie: sorted iteration");
	hattrie_iter_free(it);

	/* Cleanup */
	for (unsigned i = 0; i < key_count; ++i) {
		free(keys[i]);
	}
	free(keys);
	hattrie_free(trie);
	return 0;
}
示例#7
0
int main(int argc, char* argv[])
{
    if (argc < 2) {
        fprintf(stderr, "Usage: bam-summarize reads.bam\n");
        exit(EXIT_FAILURE);
    }

    samfile_t* f = samopen(argv[1], "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", argv[1]);
        exit(1);
    }

    bam1_t* b = bam_init1();


    hattrie_t* T = hattrie_create();

    char* qname = NULL;
    size_t qname_size = 0;

    size_t j, n = 0;
    uint32_t* cigar;
    uint32_t cigar_op, cigar_len;

    read_stat_t** val;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%zu alignments\n", n);
        }

        bool perfect = true;
        bool spliced = false;
        bool gapped  = false;

        cigar = bam1_cigar(b);
        for (j = 0; j < b->core.n_cigar; ++j) {
            cigar_op  = cigar[j] & BAM_CIGAR_MASK;
            cigar_len = cigar[j] >> BAM_CIGAR_SHIFT;

            if (cigar_op == BAM_CREF_SKIP) {
                if (cigar_len < min_splice_length) gapped = true;
                else                               spliced = true;
            }
            else if (cigar_op != BAM_CMATCH)  perfect = false;

            if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) break;
        }

        /* Skip any clipped alignments. We don't want your kind! */
        if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) continue;

        /* Hack the read to include mate information. */
        if (b->core.flag & BAM_FPAIRED) {
            if (qname_size < b->core.l_qname + 3) {
                qname_size = b->core.l_qname + 3;
                qname = realloc(qname, qname_size);
            }
            memcpy(qname, bam1_qname(b), b->core.l_qname);

            if (b->core.flag & BAM_FREAD1) {
                qname[b->core.l_qname]     = '/';
                qname[b->core.l_qname + 1] = '2';
                qname[b->core.l_qname + 2] = '\0';
            }
            else {
                qname[b->core.l_qname]     = '/';
                qname[b->core.l_qname + 1] = '1';
                qname[b->core.l_qname + 2] = '\0';
            }

            val = (read_stat_t**) hattrie_get(T, qname, b->core.l_qname + 2);
        }
        else {
            val = (read_stat_t**) hattrie_get(T, bam1_qname(b), b->core.l_qname);
        }


        if (*val == NULL) {
            *val = malloc(sizeof(read_stat_t));
            memset(*val, 0, sizeof(read_stat_t));
        }

        (*val)->aln_count++;
        if (perfect) {
            if (spliced) (*val)->spliced_perfect_cnt++;
            else         (*val)->unspliced_perfect_cnt++;
        }

        if (spliced) (*val)->spliced_cnt++;
        if (gapped) (*val)->gapped_cnt++;
    }

    printf("alignment_count\t%zu\n", n);
    printf("read_count\t%zu\n", hattrie_size(T));


    /* print stats from the table */

    uint32_t multi_count = 0;

    uint32_t unspliced_perfect_cnt = 0;
    uint32_t spliced_perfect_cnt = 0;

    uint32_t spliced_cnt = 0;
    uint32_t gapped_cnt = 0;

    /* excluding multireads */
    uint32_t unique_unspliced_perfect_cnt = 0;
    uint32_t unique_spliced_perfect_cnt = 0;

    uint32_t unique_spliced_cnt = 0;
    uint32_t unique_gapped_cnt = 0;



    hattrie_iter_t* i;
    for (i = hattrie_iter_begin(T);
         !hattrie_iter_finished(i);
         hattrie_iter_next(i))
    {
        val = (read_stat_t**) hattrie_iter_val(i);

        if ((*val)->aln_count == 1) {
            unique_unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt;
            unique_spliced_perfect_cnt   += (*val)->spliced_perfect_cnt;

            unique_spliced_cnt += (*val)->spliced_cnt;
            unique_gapped_cnt  += (*val)->gapped_cnt;
        }
        else multi_count++;

        unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt;
        spliced_perfect_cnt   += (*val)->spliced_perfect_cnt;

        spliced_cnt += (*val)->spliced_cnt;
        gapped_cnt  += (*val)->gapped_cnt;
    }

    hattrie_iter_free(i);


    printf("multi_count\t%u\n", multi_count);
    printf("unspliced_perfect_cnt\t%u\n", unspliced_perfect_cnt);
    printf("spliced_perfect_cnt\t%u\n", spliced_perfect_cnt);
    printf("spliced_cnt\t%u\n", spliced_cnt);
    printf("gapped_cnt\t%u\n", gapped_cnt);

    printf("unique_unspliced_perfect_cnt\t%u\n", unique_unspliced_perfect_cnt);
    printf("unique_spliced_perfect_cnt\t%u\n", unique_spliced_perfect_cnt);
    printf("unique_spliced_cnt\t%u\n", unique_spliced_cnt);
    printf("unique_gapped_cnt\t%u\n", unique_gapped_cnt);


    /* free the table */
    for (i = hattrie_iter_begin(T);
         !hattrie_iter_finished(i);
         hattrie_iter_next(i))
    {
        free(* (read_stat_t**) hattrie_iter_val(i));
    }

    hattrie_iter_free(i);
    hattrie_free(T);
    free(qname);

    bam_destroy1(b);

    return 0;
}
示例#8
0
AlnIndex::AlnIndex()
{
    t = hattrie_create();
}
示例#9
0
knot_zone_tree_t* knot_zone_tree_create()
{
	return hattrie_create();
}