nt main(int argc, char *argv[]) { FILE *genome_f; int read_size=0; //the size of the reads int table_factor; //arbitrary int num_of_reads; int bf_table_size = table_factor*num_of_reads; int num_of_hash_func; BloomFilter* bf_unique; //BF for the unique tries // char* output_label=(char *)malloc(50); //label name for the output files hattrie_t* trie_unique; //hattrie that holds the unique reads hattrie_t* trie_repeat; //hattrie that holds the repetetive reads, and the one that has N inside of them. hattrie_t* trie_genome_unique; //put 'accepts' (everything that uniqe BF says yes that it's in genome) into a trie hattrie_t* trie_fp; //triw that holds false negatives set hattrie_t* trie_fn;//trie that holds false positives set // f = fopen(argv[1], "r"); trie_repeat = hattrie_create(); trie_unique = hattrie_create(); make_repeat_and_unique_tries((argv[1], "r"), trie_unique, trie_repeat); table_factor = 10; //arbitrary num_of_reads = line_number/2; bf_table_size = table_factor*num_of_reads; num_of_hash_func = (int) ceil(table_factor*0.69314); //print the keys of the uniqe and repaet tries hattrie_iteration(trie_unique, "unique", argv[3]); hattrie_iteration(trie_repeat, "repeat", argv[3]); //hashing uniqe reads trie using bloom filter bf_unique = bloom_filter_new(bf_table_size, string_hash, num_of_hash_func); hash_trie_into_bf(trie_unique, bf_unique); check_if_trie_in_bf(trie_unique, bf_unique); //create trie for all of the sliding windows in the genome reference which are in the unique reads according to the bf_unique read_size = size-2; trie_genome_unique = hattrie_create(); query_bf_with_genome(bf_unique, genome_f ,trie_genome_unique, read_size); check_if_trie_in_bf(trie_unique, bf_unique); fclose(genome_f); hattrie_iteration(trie_genome_unique, "genome_unique", argv[3]); trie_fp = hattrie_create(); trie_fn = hattrie_create(); printf("start checking for false positive \n"); check_fp(trie_unique,trie_genome_unique, trie_fp); hattrie_iteration(trie_fp, "fp_unique", argv[3]); printf("start checking for false negative \n"); check_fn(trie_unique,trie_genome_unique, trie_fn); hattrie_iteration(trie_fn, "fn_unique", argv[3]); bloom_filter_free(bf_unique); free(buffer); hattrie_free(trie_unique); hattrie_free(trie_repeat); hattrie_free(trie_genome_unique); hattrie_free(trie_fn); hattrie_free(trie_fp); return 0; }
int main() { hattrie_t* T = hattrie_create(); const size_t n = 1000000; // how many strings const size_t m_low = 50; // minimum length of each string const size_t m_high = 500; // maximum length of each string char x[501]; size_t i, m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); randstr(x, m); *hattrie_get(T, x, m) = 1; } hattrie_iter_t* it; clock_t t0, t; const size_t repetitions = 100; size_t r; /* iterate in unsorted order */ fprintf(stderr, "iterating out of order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, false); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); /* iterate in sorted order */ fprintf(stderr, "iterating in order ... "); t0 = clock(); for (r = 0; r < repetitions; ++r) { it = hattrie_iter_begin(T, true); while (!hattrie_iter_finished(it)) { hattrie_iter_next(it); } hattrie_iter_free(it); } t = clock(); fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); hattrie_free(T); return 0; }
void test_trie_non_ascii() { fprintf(stderr, "checking non-ascii... \n"); value_t* u; hattrie_t* T = hattrie_create(); char* txt = "\x81\x70"; u = hattrie_get(T, txt, strlen(txt)); *u = 10; u = hattrie_tryget(T, txt, strlen(txt)); if (*u != 10) { fprintf(stderr, "can't store non-ascii strings\n"); } hattrie_free(T); fprintf(stderr, "done.\n"); }
void setup() { fprintf(stderr, "generating %zu keys ... ", n); xs = malloc(n * sizeof(char*)); ds = malloc(d * sizeof(char*)); size_t i; size_t m; for (i = 0; i < n; ++i) { m = m_low + rand() % (m_high - m_low); xs[i] = malloc(m + 1); randstr(xs[i], m); } for (i = 0; i < d; ++i) { m = rand()%n; ds[i] = xs[m]; } T = hattrie_create(); M = str_map_create(); fprintf(stderr, "done.\n"); }
zone_tree_t* zone_tree_create() { return hattrie_create(); }
int main(int argc, char *argv[]) { plan_lazy(); /* Random keys. */ srand(time(NULL)); unsigned key_count = 100000; char **keys = malloc(sizeof(char*) * key_count); for (unsigned i = 0; i < key_count; ++i) { keys[i] = str_key_rand(KEY_MAXLEN); } /* Sort random keys. */ str_key_sort(keys, key_count); /* Create trie */ value_t *val = NULL; hattrie_t *trie = hattrie_create(); ok(trie != NULL, "hattrie: create"); /* Insert keys */ bool passed = true; size_t inserted = 0; for (unsigned i = 0; i < key_count; ++i) { val = hattrie_get(trie, keys[i], strlen(keys[i]) + 1); if (!val) { passed = false; break; } if (*val == NULL) { *val = keys[i]; ++inserted; } } ok(passed, "hattrie: insert"); /* Check total insertions against trie weight. */ is_int(hattrie_weight(trie), inserted, "hattrie: trie weight matches insertions"); /* Build order-index. */ hattrie_build_index(trie); /* Lookup all keys */ passed = true; for (unsigned i = 0; i < key_count; ++i) { val = hattrie_tryget(trie, keys[i], strlen(keys[i]) + 1); if (val && (*val == keys[i] || strcmp(*val, keys[i]) == 0)) { continue; } else { diag("hattrie: mismatch on element '%u'", i); passed = false; break; } } ok(passed, "hattrie: lookup all keys"); /* Lesser or equal lookup. */ passed = true; for (unsigned i = 0; i < key_count; ++i) { if (!str_key_find_leq(trie, keys, i, key_count)) { passed = false; for (int off = -10; off < 10; ++off) { int k = (int)i + off; if (k < 0 || k >= key_count) { continue; } diag("[%u/%d]: %s%s", i, off, off == 0?">":"",keys[k]); } break; } } ok(passed, "hattrie: find lesser or equal for all keys"); /* Next lookup. */ passed = true; for (unsigned i = 0; i < key_count - 1 && passed; ++i) { value_t *val; hattrie_find_next(trie, keys[i], strlen(keys[i]), &val); passed = val && *val == (void *)keys[(i + 1)]; } ok(passed, "hattrie: find next for all keys"); /* Unsorted iteration */ size_t iterated = 0; hattrie_iter_t *it = hattrie_iter_begin(trie, false); while (!hattrie_iter_finished(it)) { ++iterated; hattrie_iter_next(it); } is_int(inserted, iterated, "hattrie: unsorted iteration"); hattrie_iter_free(it); /* Sorted iteration. */ char key_buf[KEY_MAXLEN] = {'\0'}; iterated = 0; it = hattrie_iter_begin(trie, true); while (!hattrie_iter_finished(it)) { size_t cur_key_len = 0; const char *cur_key = hattrie_iter_key(it, &cur_key_len); if (iterated > 0) { /* Only if previous exists. */ if (strcmp(key_buf, cur_key) > 0) { diag("'%s' <= '%s' FAIL\n", key_buf, cur_key); break; } } ++iterated; memcpy(key_buf, cur_key, cur_key_len); hattrie_iter_next(it); } is_int(inserted, iterated, "hattrie: sorted iteration"); hattrie_iter_free(it); /* Cleanup */ for (unsigned i = 0; i < key_count; ++i) { free(keys[i]); } free(keys); hattrie_free(trie); return 0; }
int main(int argc, char* argv[]) { if (argc < 2) { fprintf(stderr, "Usage: bam-summarize reads.bam\n"); exit(EXIT_FAILURE); } samfile_t* f = samopen(argv[1], "rb", NULL); if (f == NULL) { fprintf(stderr, "can't open bam file %s\n", argv[1]); exit(1); } bam1_t* b = bam_init1(); hattrie_t* T = hattrie_create(); char* qname = NULL; size_t qname_size = 0; size_t j, n = 0; uint32_t* cigar; uint32_t cigar_op, cigar_len; read_stat_t** val; while (samread(f, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%zu alignments\n", n); } bool perfect = true; bool spliced = false; bool gapped = false; cigar = bam1_cigar(b); for (j = 0; j < b->core.n_cigar; ++j) { cigar_op = cigar[j] & BAM_CIGAR_MASK; cigar_len = cigar[j] >> BAM_CIGAR_SHIFT; if (cigar_op == BAM_CREF_SKIP) { if (cigar_len < min_splice_length) gapped = true; else spliced = true; } else if (cigar_op != BAM_CMATCH) perfect = false; if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) break; } /* Skip any clipped alignments. We don't want your kind! */ if (cigar_op == BAM_CSOFT_CLIP || cigar_op == BAM_CHARD_CLIP) continue; /* Hack the read to include mate information. */ if (b->core.flag & BAM_FPAIRED) { if (qname_size < b->core.l_qname + 3) { qname_size = b->core.l_qname + 3; qname = realloc(qname, qname_size); } memcpy(qname, bam1_qname(b), b->core.l_qname); if (b->core.flag & BAM_FREAD1) { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '2'; qname[b->core.l_qname + 2] = '\0'; } else { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '1'; qname[b->core.l_qname + 2] = '\0'; } val = (read_stat_t**) hattrie_get(T, qname, b->core.l_qname + 2); } else { val = (read_stat_t**) hattrie_get(T, bam1_qname(b), b->core.l_qname); } if (*val == NULL) { *val = malloc(sizeof(read_stat_t)); memset(*val, 0, sizeof(read_stat_t)); } (*val)->aln_count++; if (perfect) { if (spliced) (*val)->spliced_perfect_cnt++; else (*val)->unspliced_perfect_cnt++; } if (spliced) (*val)->spliced_cnt++; if (gapped) (*val)->gapped_cnt++; } printf("alignment_count\t%zu\n", n); printf("read_count\t%zu\n", hattrie_size(T)); /* print stats from the table */ uint32_t multi_count = 0; uint32_t unspliced_perfect_cnt = 0; uint32_t spliced_perfect_cnt = 0; uint32_t spliced_cnt = 0; uint32_t gapped_cnt = 0; /* excluding multireads */ uint32_t unique_unspliced_perfect_cnt = 0; uint32_t unique_spliced_perfect_cnt = 0; uint32_t unique_spliced_cnt = 0; uint32_t unique_gapped_cnt = 0; hattrie_iter_t* i; for (i = hattrie_iter_begin(T); !hattrie_iter_finished(i); hattrie_iter_next(i)) { val = (read_stat_t**) hattrie_iter_val(i); if ((*val)->aln_count == 1) { unique_unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt; unique_spliced_perfect_cnt += (*val)->spliced_perfect_cnt; unique_spliced_cnt += (*val)->spliced_cnt; unique_gapped_cnt += (*val)->gapped_cnt; } else multi_count++; unspliced_perfect_cnt += (*val)->unspliced_perfect_cnt; spliced_perfect_cnt += (*val)->spliced_perfect_cnt; spliced_cnt += (*val)->spliced_cnt; gapped_cnt += (*val)->gapped_cnt; } hattrie_iter_free(i); printf("multi_count\t%u\n", multi_count); printf("unspliced_perfect_cnt\t%u\n", unspliced_perfect_cnt); printf("spliced_perfect_cnt\t%u\n", spliced_perfect_cnt); printf("spliced_cnt\t%u\n", spliced_cnt); printf("gapped_cnt\t%u\n", gapped_cnt); printf("unique_unspliced_perfect_cnt\t%u\n", unique_unspliced_perfect_cnt); printf("unique_spliced_perfect_cnt\t%u\n", unique_spliced_perfect_cnt); printf("unique_spliced_cnt\t%u\n", unique_spliced_cnt); printf("unique_gapped_cnt\t%u\n", unique_gapped_cnt); /* free the table */ for (i = hattrie_iter_begin(T); !hattrie_iter_finished(i); hattrie_iter_next(i)) { free(* (read_stat_t**) hattrie_iter_val(i)); } hattrie_iter_free(i); hattrie_free(T); free(qname); bam_destroy1(b); return 0; }
AlnIndex::AlnIndex() { t = hattrie_create(); }
knot_zone_tree_t* knot_zone_tree_create() { return hattrie_create(); }