Esempio n. 1
0
long AlnIndex::add(const char* key)
{
    value_t* val = hattrie_get(t, key, strlen(key));
    if (*val == 0) {
        *val = hattrie_size(t);
    }
    return *val;
}
Esempio n. 2
0
int knot_zone_tree_insert(knot_zone_tree_t *tree, knot_node_t *node)
{
	assert(tree && node && node->owner);
	char lf[DNAME_LFT_MAXLEN];
	dname_lf(lf, node->owner, sizeof(lf));

	*hattrie_get(tree, lf+1, *lf) = node;
	return KNOT_EOK;
}
int main()
{
    hattrie_t* T = hattrie_create();
    const size_t n = 1000000;  // how many strings
    const size_t m_low  = 50;  // minimum length of each string
    const size_t m_high = 500; // maximum length of each string
    char x[501];

    size_t i, m;
    for (i = 0; i < n; ++i) {
        m = m_low + rand() % (m_high - m_low);
        randstr(x, m);
        *hattrie_get(T, x, m) = 1;
    }

    hattrie_iter_t* it;
    clock_t t0, t;
    const size_t repetitions = 100;
    size_t r;

    /* iterate in unsorted order */
    fprintf(stderr, "iterating out of order ... ");
    t0 = clock();
    for (r = 0; r < repetitions; ++r) {
        it = hattrie_iter_begin(T, false);
        while (!hattrie_iter_finished(it)) {
            hattrie_iter_next(it);
        }
        hattrie_iter_free(it);
    }
    t = clock();
    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);


    /* iterate in sorted order */
    fprintf(stderr, "iterating in order ... ");
    t0 = clock();
    for (r = 0; r < repetitions; ++r) {
        it = hattrie_iter_begin(T, true);
        while (!hattrie_iter_finished(it)) {
            hattrie_iter_next(it);
        }
        hattrie_iter_free(it);
    }
    t = clock();
    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);


    hattrie_free(T);

    return 0;
}
Esempio n. 4
0
int zone_tree_insert(zone_tree_t *tree, zone_node_t *node)
{
	if (tree == NULL) {
		return KNOT_EINVAL;
	}

	assert(tree && node && node->owner);
	uint8_t lf[KNOT_DNAME_MAXLEN];
	knot_dname_lf(lf, node->owner, NULL);

	*hattrie_get(tree, (char*)lf+1, *lf) = node;
	return KNOT_EOK;
}
Esempio n. 5
0
void test_trie_non_ascii()
{
    fprintf(stderr, "checking non-ascii... \n");

    value_t* u;
    hattrie_t* T = hattrie_create();
    char* txt = "\x81\x70";

    u = hattrie_get(T, txt, strlen(txt));
    *u = 10;

    u = hattrie_tryget(T, txt, strlen(txt));
    if (*u != 10) {
        fprintf(stderr, "can't store non-ascii strings\n");
    }
    hattrie_free(T);

    fprintf(stderr, "done.\n");
}
Esempio n. 6
0
hattrie_t* hattrie_dup(const hattrie_t* T, value_t (*nval)(value_t))
{
    hattrie_t *N = hattrie_create_n(T->bsize, &T->mm);

    /* assignment */
    if (!nval) nval = hattrie_setval;

    /*! \todo could be probably implemented faster */

    size_t l = 0;
    const char *k = 0;
    hattrie_iter_t *i = hattrie_iter_begin(T, false);
    while (!hattrie_iter_finished(i)) {
        k = hattrie_iter_key(i, &l);
        *hattrie_get(N, k, l) = nval(*hattrie_iter_val(i));
        hattrie_iter_next(i);
    }
    hattrie_iter_free(i);
    return N;
}
Esempio n. 7
0
void test_hattrie_insert()
{
    fprintf(stderr, "inserting %zu keys ... \n", k);

    size_t i, j;
    value_t* u;
    value_t  v;

    for (j = 0; j < k; ++j) {
        i = rand() % n;


        v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
        str_map_set(M, xs[i], strlen(xs[i]), v);


        u = hattrie_get(T, xs[i], strlen(xs[i]));
        *u += 1;


        if (*u != v) {
            fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
                    *u, v);
        }
    }

    fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T));

    fprintf(stderr, "deleting %zu keys ... \n", d);
    for (j = 0; j < d; ++j) {
        str_map_del(M, ds[j], strlen(ds[j]));
        hattrie_del(T, ds[j], strlen(ds[j]));
        u = hattrie_tryget(T, ds[j], strlen(ds[j]));
        if (u) {
            fprintf(stderr, "[error] item %zu still found in trie after delete\n",
                    j);
        }
    }

    fprintf(stderr, "done.\n");
}
Esempio n. 8
0
File: hattrie.c Progetto: idtek/knot
int main(int argc, char *argv[])
{
	plan_lazy();

	/* Random keys. */
	srand(time(NULL));
	unsigned key_count = 100000;
	char **keys = malloc(sizeof(char*) * key_count);
	for (unsigned i = 0; i < key_count; ++i) {
		keys[i] = str_key_rand(KEY_MAXLEN);
	}

	/* Sort random keys. */
	str_key_sort(keys, key_count);

	/* Create trie */
	value_t *val = NULL;
	hattrie_t *trie = hattrie_create();
	ok(trie != NULL, "hattrie: create");

	/* Insert keys */
	bool passed = true;
	size_t inserted = 0;
	for (unsigned i = 0; i < key_count; ++i) {
		val = hattrie_get(trie, keys[i], strlen(keys[i]) + 1);
		if (!val) {
			passed = false;
			break;
		}
		if (*val == NULL) {
			*val = keys[i];
			++inserted;
		}
	}
	ok(passed, "hattrie: insert");

	/* Check total insertions against trie weight. */
	is_int(hattrie_weight(trie), inserted, "hattrie: trie weight matches insertions");

	/* Build order-index. */
	hattrie_build_index(trie);

	/* Lookup all keys */
	passed = true;
	for (unsigned i = 0; i < key_count; ++i) {
		val = hattrie_tryget(trie, keys[i], strlen(keys[i]) + 1);
		if (val && (*val == keys[i] || strcmp(*val, keys[i]) == 0)) {
			continue;
		} else {
			diag("hattrie: mismatch on element '%u'", i);
			passed = false;
			break;
		}
	}
	ok(passed, "hattrie: lookup all keys");

	/* Lesser or equal lookup. */
	passed = true;
	for (unsigned i = 0; i < key_count; ++i) {
		if (!str_key_find_leq(trie, keys, i, key_count)) {
			passed = false;
			for (int off = -10; off < 10; ++off) {
				int k = (int)i + off;
				if (k < 0 || k >= key_count) {
					continue;
				}
				diag("[%u/%d]: %s%s", i, off, off == 0?">":"",keys[k]);
			}
			break;
		}
	}
	ok(passed, "hattrie: find lesser or equal for all keys");

	/* Next lookup. */
	passed = true;
	for (unsigned i = 0; i < key_count - 1 && passed; ++i) {
		value_t *val;
		hattrie_find_next(trie, keys[i], strlen(keys[i]), &val);
		passed = val && *val == (void *)keys[(i + 1)];
	}
	ok(passed, "hattrie: find next for all keys");

	/* Unsorted iteration */
	size_t iterated = 0;
	hattrie_iter_t *it = hattrie_iter_begin(trie, false);
	while (!hattrie_iter_finished(it)) {
		++iterated;
		hattrie_iter_next(it);
	}
	is_int(inserted, iterated, "hattrie: unsorted iteration");
	hattrie_iter_free(it);

	/* Sorted iteration. */
	char key_buf[KEY_MAXLEN] = {'\0'};
	iterated = 0;
	it = hattrie_iter_begin(trie, true);
	while (!hattrie_iter_finished(it)) {
		size_t cur_key_len = 0;
		const char *cur_key = hattrie_iter_key(it, &cur_key_len);
		if (iterated > 0) { /* Only if previous exists. */
			if (strcmp(key_buf, cur_key) > 0) {
				diag("'%s' <= '%s' FAIL\n", key_buf, cur_key);
				break;
			}
		}
		++iterated;
		memcpy(key_buf, cur_key, cur_key_len);
		hattrie_iter_next(it);
	}
	is_int(inserted, iterated, "hattrie: sorted iteration");
	hattrie_iter_free(it);

	/* Cleanup */
	for (unsigned i = 0; i < key_count; ++i) {
		free(keys[i]);
	}
	free(keys);
	hattrie_free(trie);
	return 0;
}
Esempio n. 9
0
int main(int argc, char* argv[])
{
    if (argc < 2) {
        fprintf(stderr, "Usage: bam-summarize reads.bam\n");
        exit(EXIT_FAILURE);
    }

    samfile_t* f = samopen(argv[1], "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", argv[1]);
        exit(1);
    }

    bam1_t* b = bam_init1();


    hattrie_t* T = hattrie_create();

    char* qname = NULL;
    size_t qname_size = 0;

    size_t j, n = 0;
    uint32_t* cigar;
    uint32_t cigar_op, cigar_len;

    read_stat_t** val;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%zu alignments\n", n);
        }

        bool perfect = true;
        bool spliced = false;
        bool gapped  = false;

        cigar = bam1_cigar(b);
        for (j = 0; j < b->core.n_cigar; ++j) {
            cigar_op  = cigar[j] & BAM_CIGAR_MASK;
            cigar_len = cigar[j] >> BAM_CIGAR_SHIFT;

            if (cigar_op == BAM_CREF_SKIP) {
                if (cigar_len < min_splice_length) gapped = true;
                else                               spliced = true;
            }
            else if (cigar_op != BAM_CMATCH)  perfect = false;

            if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) break;
        }

        /* Skip any clipped alignments. We don't want your kind! */
        if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) continue;

        /* Hack the read to include mate information. */
        if (b->core.flag & BAM_FPAIRED) {
            if (qname_size < b->core.l_qname + 3) {
                qname_size = b->core.l_qname + 3;
                qname = realloc(qname, qname_size);
            }
            memcpy(qname, bam1_qname(b), b->core.l_qname);

            if (b->core.flag & BAM_FREAD1) {
                qname[b->core.l_qname]     = '/';
                qname[b->core.l_qname + 1] = '2';
                qname[b->core.l_qname + 2] = '\0';
            }
            else {
                qname[b->core.l_qname]     = '/';
                qname[b->core.l_qname + 1] = '1';
                qname[b->core.l_qname + 2] = '\0';
            }

            val = (read_stat_t**) hattrie_get(T, qname, b->core.l_qname + 2);
        }
        else {
            val = (read_stat_t**) hattrie_get(T, bam1_qname(b), b->core.l_qname);
        }


        if (*val == NULL) {
            *val = malloc(sizeof(read_stat_t));
            memset(*val, 0, sizeof(read_stat_t));
        }

        (*val)->aln_count++;
        if (perfect) {
            if (spliced) (*val)->spliced_perfect_cnt++;
            else         (*val)->unspliced_perfect_cnt++;
        }

        if (spliced) (*val)->spliced_cnt++;
        if (gapped) (*val)->gapped_cnt++;
    }

    printf("alignment_count\t%zu\n", n);
    printf("read_count\t%zu\n", hattrie_size(T));


    /* print stats from the table */

    uint32_t multi_count = 0;

    uint32_t unspliced_perfect_cnt = 0;
    uint32_t spliced_perfect_cnt = 0;

    uint32_t spliced_cnt = 0;
    uint32_t gapped_cnt = 0;

    /* excluding multireads */
    uint32_t unique_unspliced_perfect_cnt = 0;
    uint32_t unique_spliced_perfect_cnt = 0;

    uint32_t unique_spliced_cnt = 0;
    uint32_t unique_gapped_cnt = 0;



    hattrie_iter_t* i;
    for (i = hattrie_iter_begin(T);
         !hattrie_iter_finished(i);
         hattrie_iter_next(i))
    {
        val = (read_stat_t**) hattrie_iter_val(i);

        if ((*val)->aln_count == 1) {
            unique_unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt;
            unique_spliced_perfect_cnt   += (*val)->spliced_perfect_cnt;

            unique_spliced_cnt += (*val)->spliced_cnt;
            unique_gapped_cnt  += (*val)->gapped_cnt;
        }
        else multi_count++;

        unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt;
        spliced_perfect_cnt   += (*val)->spliced_perfect_cnt;

        spliced_cnt += (*val)->spliced_cnt;
        gapped_cnt  += (*val)->gapped_cnt;
    }

    hattrie_iter_free(i);


    printf("multi_count\t%u\n", multi_count);
    printf("unspliced_perfect_cnt\t%u\n", unspliced_perfect_cnt);
    printf("spliced_perfect_cnt\t%u\n", spliced_perfect_cnt);
    printf("spliced_cnt\t%u\n", spliced_cnt);
    printf("gapped_cnt\t%u\n", gapped_cnt);

    printf("unique_unspliced_perfect_cnt\t%u\n", unique_unspliced_perfect_cnt);
    printf("unique_spliced_perfect_cnt\t%u\n", unique_spliced_perfect_cnt);
    printf("unique_spliced_cnt\t%u\n", unique_spliced_cnt);
    printf("unique_gapped_cnt\t%u\n", unique_gapped_cnt);


    /* free the table */
    for (i = hattrie_iter_begin(T);
         !hattrie_iter_finished(i);
         hattrie_iter_next(i))
    {
        free(* (read_stat_t**) hattrie_iter_val(i));
    }

    hattrie_iter_free(i);
    hattrie_free(T);
    free(qname);

    bam_destroy1(b);

    return 0;
}