long AlnIndex::add(const char* key) { value_t* val = hattrie_get(t, key, strlen(key)); if (*val == 0) { *val = hattrie_size(t); } return *val; }
int knot_zone_tree_insert(knot_zone_tree_t *tree, knot_node_t *node) { assert(tree && node && node->owner); char lf[DNAME_LFT_MAXLEN]; dname_lf(lf, node->owner, sizeof(lf)); *hattrie_get(tree, lf+1, *lf) = node; return KNOT_EOK; }
int main() { hattrie_t* T = hattrie_create(); const size_t n = 1000000; // how many strings const size_t m_low = 50; // minimum length of each string const size_t m_high = 500; // maximum length of each string char x[501]; size_t i, m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); randstr(x, m); *hattrie_get(T, x, m) = 1; } hattrie_iter_t* it; clock_t t0, t; const size_t repetitions = 100; size_t r; /* iterate in unsorted order */ fprintf(stderr, "iterating out of order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, false); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); /* iterate in sorted order */ fprintf(stderr, "iterating in order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, true); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); hattrie_free(T); return 0; }
int zone_tree_insert(zone_tree_t *tree, zone_node_t *node) { if (tree == NULL) { return KNOT_EINVAL; } assert(tree && node && node->owner); uint8_t lf[KNOT_DNAME_MAXLEN]; knot_dname_lf(lf, node->owner, NULL); *hattrie_get(tree, (char*)lf+1, *lf) = node; return KNOT_EOK; }
void test_trie_non_ascii() { fprintf(stderr, "checking non-ascii... \n"); value_t* u; hattrie_t* T = hattrie_create(); char* txt = "\x81\x70"; u = hattrie_get(T, txt, strlen(txt)); *u = 10; u = hattrie_tryget(T, txt, strlen(txt)); if (*u != 10) { fprintf(stderr, "can't store non-ascii strings\n"); } hattrie_free(T); fprintf(stderr, "done.\n"); }
hattrie_t* hattrie_dup(const hattrie_t* T, value_t (*nval)(value_t)) { hattrie_t *N = hattrie_create_n(T->bsize, &T->mm); /* assignment */ if (!nval) nval = hattrie_setval; /*! \todo could be probably implemented faster */ size_t l = 0; const char *k = 0; hattrie_iter_t *i = hattrie_iter_begin(T, false); while (!hattrie_iter_finished(i)) { k = hattrie_iter_key(i, &l); *hattrie_get(N, k, l) = nval(*hattrie_iter_val(i)); hattrie_iter_next(i); } hattrie_iter_free(i); return N; }
void test_hattrie_insert() { fprintf(stderr, "inserting %zu keys ... \n", k); size_t i, j; value_t* u; value_t v; for (j = 0; j < k; ++j) { i = rand() % n; v = 1 + str_map_get(M, xs[i], strlen(xs[i])); str_map_set(M, xs[i], strlen(xs[i]), v); u = hattrie_get(T, xs[i], strlen(xs[i])); *u += 1; if (*u != v) { fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", *u, v); } } fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T)); fprintf(stderr, "deleting %zu keys ... \n", d); for (j = 0; j < d; ++j) { str_map_del(M, ds[j], strlen(ds[j])); hattrie_del(T, ds[j], strlen(ds[j])); u = hattrie_tryget(T, ds[j], strlen(ds[j])); if (u) { fprintf(stderr, "[error] item %zu still found in trie after delete\n", j); } } fprintf(stderr, "done.\n"); }
int main(int argc, char *argv[]) { plan_lazy(); /* Random keys. */ srand(time(NULL)); unsigned key_count = 100000; char **keys = malloc(sizeof(char*) * key_count); for (unsigned i = 0; i < key_count; ++i) { keys[i] = str_key_rand(KEY_MAXLEN); } /* Sort random keys. */ str_key_sort(keys, key_count); /* Create trie */ value_t *val = NULL; hattrie_t *trie = hattrie_create(); ok(trie != NULL, "hattrie: create"); /* Insert keys */ bool passed = true; size_t inserted = 0; for (unsigned i = 0; i < key_count; ++i) { val = hattrie_get(trie, keys[i], strlen(keys[i]) + 1); if (!val) { passed = false; break; } if (*val == NULL) { *val = keys[i]; ++inserted; } } ok(passed, "hattrie: insert"); /* Check total insertions against trie weight. */ is_int(hattrie_weight(trie), inserted, "hattrie: trie weight matches insertions"); /* Build order-index. */ hattrie_build_index(trie); /* Lookup all keys */ passed = true; for (unsigned i = 0; i < key_count; ++i) { val = hattrie_tryget(trie, keys[i], strlen(keys[i]) + 1); if (val && (*val == keys[i] || strcmp(*val, keys[i]) == 0)) { continue; } else { diag("hattrie: mismatch on element '%u'", i); passed = false; break; } } ok(passed, "hattrie: lookup all keys"); /* Lesser or equal lookup. */ passed = true; for (unsigned i = 0; i < key_count; ++i) { if (!str_key_find_leq(trie, keys, i, key_count)) { passed = false; for (int off = -10; off < 10; ++off) { int k = (int)i + off; if (k < 0 || k >= key_count) { continue; } diag("[%u/%d]: %s%s", i, off, off == 0?">":"",keys[k]); } break; } } ok(passed, "hattrie: find lesser or equal for all keys"); /* Next lookup. */ passed = true; for (unsigned i = 0; i < key_count - 1 && passed; ++i) { value_t *val; hattrie_find_next(trie, keys[i], strlen(keys[i]), &val); passed = val && *val == (void *)keys[(i + 1)]; } ok(passed, "hattrie: find next for all keys"); /* Unsorted iteration */ size_t iterated = 0; hattrie_iter_t *it = hattrie_iter_begin(trie, false); while (!hattrie_iter_finished(it)) { ++iterated; hattrie_iter_next(it); } is_int(inserted, iterated, "hattrie: unsorted iteration"); hattrie_iter_free(it); /* Sorted iteration. */ char key_buf[KEY_MAXLEN] = {'\0'}; iterated = 0; it = hattrie_iter_begin(trie, true); while (!hattrie_iter_finished(it)) { size_t cur_key_len = 0; const char *cur_key = hattrie_iter_key(it, &cur_key_len); if (iterated > 0) { /* Only if previous exists. */ if (strcmp(key_buf, cur_key) > 0) { diag("'%s' <= '%s' FAIL\n", key_buf, cur_key); break; } } ++iterated; memcpy(key_buf, cur_key, cur_key_len); hattrie_iter_next(it); } is_int(inserted, iterated, "hattrie: sorted iteration"); hattrie_iter_free(it); /* Cleanup */ for (unsigned i = 0; i < key_count; ++i) { free(keys[i]); } free(keys); hattrie_free(trie); return 0; }
int main(int argc, char* argv[]) { if (argc < 2) { fprintf(stderr, "Usage: bam-summarize reads.bam\n"); exit(EXIT_FAILURE); } samfile_t* f = samopen(argv[1], "rb", NULL); if (f == NULL) { fprintf(stderr, "can't open bam file %s\n", argv[1]); exit(1); } bam1_t* b = bam_init1(); hattrie_t* T = hattrie_create(); char* qname = NULL; size_t qname_size = 0; size_t j, n = 0; uint32_t* cigar; uint32_t cigar_op, cigar_len; read_stat_t** val; while (samread(f, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%zu alignments\n", n); } bool perfect = true; bool spliced = false; bool gapped = false; cigar = bam1_cigar(b); for (j = 0; j < b->core.n_cigar; ++j) { cigar_op = cigar[j] & BAM_CIGAR_MASK; cigar_len = cigar[j] >> BAM_CIGAR_SHIFT; if (cigar_op == BAM_CREF_SKIP) { if (cigar_len < min_splice_length) gapped = true; else spliced = true; } else if (cigar_op != BAM_CMATCH) perfect = false; if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) break; } /* Skip any clipped alignments. We don't want your kind! */ if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) continue; /* Hack the read to include mate information. */ if (b->core.flag & BAM_FPAIRED) { if (qname_size < b->core.l_qname + 3) { qname_size = b->core.l_qname + 3; qname = realloc(qname, qname_size); } memcpy(qname, bam1_qname(b), b->core.l_qname); if (b->core.flag & BAM_FREAD1) { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '2'; qname[b->core.l_qname + 2] = '\0'; } else { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '1'; qname[b->core.l_qname + 2] = '\0'; } val = (read_stat_t**) hattrie_get(T, qname, b->core.l_qname + 2); } else { val = (read_stat_t**) hattrie_get(T, bam1_qname(b), b->core.l_qname); } if (*val == NULL) { *val = malloc(sizeof(read_stat_t)); memset(*val, 0, sizeof(read_stat_t)); } (*val)->aln_count++; if (perfect) { if (spliced) (*val)->spliced_perfect_cnt++; else (*val)->unspliced_perfect_cnt++; } if (spliced) (*val)->spliced_cnt++; if (gapped) (*val)->gapped_cnt++; } printf("alignment_count\t%zu\n", n); printf("read_count\t%zu\n", hattrie_size(T)); /* print stats from the table */ uint32_t multi_count = 0; uint32_t unspliced_perfect_cnt = 0; uint32_t spliced_perfect_cnt = 0; uint32_t spliced_cnt = 0; uint32_t gapped_cnt = 0; /* excluding multireads */ uint32_t unique_unspliced_perfect_cnt = 0; uint32_t unique_spliced_perfect_cnt = 0; uint32_t unique_spliced_cnt = 0; uint32_t unique_gapped_cnt = 0; hattrie_iter_t* i; for (i = hattrie_iter_begin(T); !hattrie_iter_finished(i); hattrie_iter_next(i)) { val = (read_stat_t**) hattrie_iter_val(i); if ((*val)->aln_count == 1) { unique_unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt; unique_spliced_perfect_cnt += (*val)->spliced_perfect_cnt; unique_spliced_cnt += (*val)->spliced_cnt; unique_gapped_cnt += (*val)->gapped_cnt; } else multi_count++; unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt; spliced_perfect_cnt += (*val)->spliced_perfect_cnt; spliced_cnt += (*val)->spliced_cnt; gapped_cnt += (*val)->gapped_cnt; } hattrie_iter_free(i); printf("multi_count\t%u\n", multi_count); printf("unspliced_perfect_cnt\t%u\n", unspliced_perfect_cnt); printf("spliced_perfect_cnt\t%u\n", spliced_perfect_cnt); printf("spliced_cnt\t%u\n", spliced_cnt); printf("gapped_cnt\t%u\n", gapped_cnt); printf("unique_unspliced_perfect_cnt\t%u\n", unique_unspliced_perfect_cnt); printf("unique_spliced_perfect_cnt\t%u\n", unique_spliced_perfect_cnt); printf("unique_spliced_cnt\t%u\n", unique_spliced_cnt); printf("unique_gapped_cnt\t%u\n", unique_gapped_cnt); /* free the table */ for (i = hattrie_iter_begin(T); !hattrie_iter_finished(i); hattrie_iter_next(i)) { free(* (read_stat_t**) hattrie_iter_val(i)); } hattrie_iter_free(i); hattrie_free(T); free(qname); bam_destroy1(b); return 0; }