static void copy_check_alignment(const char *infname, const char *informat, const char *outfname, const char *outmode, const char *outref) { samFile *in = sam_open(infname, "r"); samFile *out = sam_open(outfname, outmode); bam1_t *aln = bam_init1(); bam_hdr_t *header = NULL; int res; if (!in) { fail("couldn't open %s", infname); goto err; } if (!out) { fail("couldn't open %s with mode %s", outfname, outmode); goto err; } if (!aln) { fail("bam_init1() failed"); goto err; } if (outref) { if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0) { fail("setting reference %s for %s", outref, outfname); goto err; } } header = sam_hdr_read(in); if (!header) { fail("reading header from %s", infname); goto err; } if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname); while ((res = sam_read1(in, header, aln)) >= 0) { int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4; if (mod4 != 0) fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"", informat, mod4, bam_get_qname(aln)); if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname); } if (res < -1) { fail("failed to read alignment from %s", infname); } err: bam_destroy1(aln); bam_hdr_destroy(header); if (in) sam_close(in); if (out) sam_close(out); }
// currently, this function ONLY works if each read has one hit void bam_mating_core(bamFile in, bamFile out) { bam_header_t *header; bam1_t *b[2]; int curr, has_prev, pre_end = 0, cur_end; kstring_t str; str.l = str.m = 0; str.s = 0; header = bam_header_read(in); bam_header_write(out, header); b[0] = bam_init1(); b[1] = bam_init1(); curr = 0; has_prev = 0; while (bam_read1(in, b[curr]) >= 0) { bam1_t *cur = b[curr], *pre = b[1-curr]; if (cur->core.tid < 0) continue; cur_end = bam_calend(&cur->core, bam1_cigar(cur)); if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP; if (cur->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments if (has_prev) { if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos; pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos; if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP)) && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE { uint32_t cur5, pre5; cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos; pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos; cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5; } else cur->core.isize = pre->core.isize = 0; if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE; else cur->core.flag &= ~BAM_FMREVERSE; if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE; else pre->core.flag &= ~BAM_FMREVERSE; if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; } if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; } bam_template_cigar(pre, cur, &str); bam_write1(out, pre); bam_write1(out, cur); has_prev = 0; } else { // unpaired or singleton pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0; if (pre->core.flag & BAM_FPAIRED) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR; } bam_write1(out, pre); } } else has_prev = 1; curr = 1 - curr; pre_end = cur_end; } if (has_prev) bam_write1(out, b[1-curr]); bam_header_destroy(header); bam_destroy1(b[0]); bam_destroy1(b[1]); free(str.s); }
SamPairIterator::SamPairIterator(const std::shared_ptr<htsFile>& sam_file_ptr, const std::shared_ptr<bam_hdr_t>& sam_header_ptr) : m_sam_file_ptr {sam_file_ptr}, m_sam_header_ptr {sam_header_ptr}, m_sam_record_ptr1 {utils::make_shared_sam(bam_init1())}, ///< important to initialize the record buffer in the constructor so we can reuse it across the iterator m_sam_record_ptr2 {utils::make_shared_sam(bam_init1())}, ///< important to initialize the record buffer in the constructor so we can reuse it across the iterator m_sam_records {fetch_next_pair()} ///< important queue must be initialized *before* we call fetch_next_pair. Order matters {}
static void _check_quality(char *OUTPUT_PREFIX, int WRITE_LOWQ, int WRITE_SPLITREAD, int MAPPING_QUALITY, int MIN_ALIGNED_PCT, int IGNORE_DUPLICATES) { bam1_t *b1 = bam_init1(), *b2 = bam_init1();; if (WRITE_SPLITREAD) { samfile_t *split_file = b2g_samfile_open("%s_splitread.bam", "rb", 0, OUTPUT_PREFIX); while (-1 < samread(split_file, b1)) { samread(split_file, b2); assert(b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES)); } samclose(split_file); } if (WRITE_LOWQ) { samfile_t *lowq_file = b2g_samfile_open("%s_lowqual.bam", "rb", 0, OUTPUT_PREFIX); while (-1 < samread(lowq_file, b1)) { samread(lowq_file, b2); if (WRITE_SPLITREAD) { assert(!b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES)); assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES)); } else assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES) || b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES)); } samclose(lowq_file); } bam_destroy1(b1); bam_destroy1(b2); }
int famstats_sum_main(int argc, char *argv[]) { if(argc < 2) return sum_usage(argv); if(strcmp(argv[1], "--help") == 0) { return sum_usage(argv, EXIT_SUCCESS); } int c; FILE *ofp(stdout); while((c = getopt(argc, argv, "o:h?")) > -1) { switch(c) { case 'o': ofp = fopen(optarg, "w"); break; case 'h': case '?': return sum_usage(argv, EXIT_SUCCESS); } } fputs("Filename: count\n", ofp); for(int i(1); i < argc; ++i) { dlib::check_bam_tag_exit(argv[i], "FM"); dlib::BamHandle in(argv[i]); bam1_t *b(bam_init1()); size_t count(0); while(sam_read1(in.fp, in.header, b) >= 0) if((b->core.flag & (BAM_FSECONDARY | BAM_FSUPPLEMENTARY | BAM_FREAD2)) == 0) count += bam_itag(b,"FM"); fprintf(ofp, "%s: %lu\n", argv[i], count); bam_destroy1(b); } fclose(ofp); return EXIT_SUCCESS; }
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid, int pos, bool reverse, const char* qseq, const char* cigar, const char* quals):iflags(0), exons(1), clipL(0), clipR(0), mapped_len(0) { novel=true; bam_header=NULL; b=bam_init1(); if (pos<=0 || gseq_tid<0) { b->core.pos=-1; //unmapped b->core.flag |= BAM_FUNMAP; gseq_tid=-1; } else b->core.pos=pos-1; //BAM is 0-based b->core.tid=gseq_tid; b->core.qual=255; b->core.mtid=-1; b->core.mpos=-1; int l_qseq=strlen(qseq); //this may not be accurate, setting CIGAR is the correct way //b->core.bin = bam_reg2bin(b->core.pos, b->core.pos+l_qseq-1); b->core.l_qname=strlen(qname)+1; //includes the \0 at the end memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname); set_cigar(cigar); //this will also set core.bin add_sequence(qseq, l_qseq); add_quals(quals); //quals must be given as Phred33 if (reverse) { b->core.flag |= BAM_FREVERSE ; } }
// remember to clean up with bam_destroy1(b); bam1_t* alignment_to_bam(const string& sam_header, const Alignment& alignment, const string& refseq, const int32_t refpos, const string& cigar, const string& mateseq, const int32_t matepos, const int32_t tlen) { assert(!sam_header.empty()); string sam_file = "data:" + sam_header + alignment_to_sam(alignment, refseq, refpos, cigar, mateseq, matepos, tlen); const char* sam = sam_file.c_str(); samFile *in = sam_open(sam, "r"); bam_hdr_t *header = sam_hdr_read(in); bam1_t *aln = bam_init1(); if (sam_read1(in, header, aln) >= 0) { bam_hdr_destroy(header); sam_close(in); // clean up return aln; } else { cerr << "[vg::alignment] Failure to parse SAM record" << endl << sam << endl; exit(1); } }
BamMerge::BamMerge(const vector<string>& bam_fnames, vector<int64_t> file_offsets) : _bam_fnames(bam_fnames), _lines(less_bam(true)), _last_id(0) { if (bam_fnames.size() <= 0) return; for (size_t i = 0; i < _bam_fnames.size(); ++i) { const char* fname = _bam_fnames[i].c_str(); samfile_t* fp = samopen(fname, "rb", 0); if (fp==0) { warn_msg(ERR_BAM_OPEN, fname); exit(1); } if (bam_fnames.size() == file_offsets.size() && file_offsets[i] > 0) bgzf_seek(fp->x.bam, file_offsets[i], SEEK_SET); bam1_t* b = bam_init1(); if (samread(fp, b) > 0) { _src_files.push_back(fp); CBamLine brec(_lines.size(), b, fp->header); _lines.push(brec); } else { bam_destroy1(b); } } if (_lines.size() == 0) { warn_msg("Warning: no input BAM records found.\n"); exit(1); } }
int main(int argc, char **argv) { dlib::BamHandle in = dlib::BamHandle("bed_test.bam"); dlib::ParsedBed bed = dlib::ParsedBed("bed_test.bed", in.header); bam1_t *b = bam_init1(); size_t diffs = 0; void *lh3bed = bed_read("bed_test.bed"); samFile *so = sam_open("disagreed.bam", "wb9"); sam_hdr_write(so, in.header); size_t disagrees = 0, agrees = 0; int dbr = 0, lh3r = 0; while(in.read(b) != -1) { if(b->core.flag & (BAM_FUNMAP)) continue; if((dbr = bed.bam1_test(b)) != (lh3r = bed_overlap(lh3bed, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)))) { LOG_EXIT("dbr: %i. lh3r: %i. Contig: %s. Position: %i. endpos; %i\n", dbr, lh3r, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)); if(++disagrees % 100 == 0) LOG_DEBUG("disagrees: %lu.\n", disagrees); sam_write1(so, in.header, b); } else { if(++agrees % 500000 == 0) LOG_DEBUG("agrees: %lu.\n", agrees); } } sam_close(so); bam_destroy1(b); bed_destroy(lh3bed); return EXIT_SUCCESS; }
static bool bam2fq_mainloop(bam2fq_state_t *state) { // process a name collated BAM into fastq bam1_t* b = bam_init1(); if (b == NULL) { perror(NULL); return false; } int64_t n_reads = 0; // Statistics kstring_t linebuf = { 0, 0, NULL }; // Buffer while (sam_read1(state->fp, state->h, b) >= 0) { if (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments || (b->core.flag&(state->flag_on)) != state->flag_on // or reads indicated by filter flags || (b->core.flag&(state->flag_off)) != 0) continue; ++n_reads; if (!bam1_to_fq(b, &linebuf, state)) return false; fputs(linebuf.s, state->fpr[which_readpart(b)]); } free(linebuf.s); bam_destroy1(b); fprintf(stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads); return true; }
BAMOrderedReader::BAMOrderedReader(std::string bam_file, std::vector<GenomeInterval>& intervals) :bam_file(bam_file), intervals(intervals), sam(0), hdr(0), idx(0), itr(0) { const char* fname = bam_file.c_str(); int len = strlen(fname); if ( strcasecmp(".bam",fname+len-4) ) { fprintf(stderr, "[%s:%d %s] Not a BAM file: %s\n", __FILE__, __LINE__, __FUNCTION__, bam_file.c_str()); exit(1); } sam = sam_open(bam_file.c_str(), "r"); hdr = sam_hdr_read(sam); s = bam_init1(); idx = bam_index_load(bam_file.c_str()); if (idx==0) { fprintf(stderr, "[%s:%d %s] fail to load index for %s\n", __FILE__, __LINE__, __FUNCTION__, bam_file.c_str()); abort(); } else { index_loaded = true; } str = {0,0,0}; intervals_present = intervals.size()!=0; interval_index = 0; random_access_enabled = intervals_present && index_loaded; };
hash_table* hash_ids(const char* fn) { fprintf(stderr, "hashing ... \n"); hash_table* T = create_hash_table(); samfile_t* f = samopen(fn, "rb", NULL); if (f == NULL) { fprintf(stderr, "can't open bam file %s\n", fn); exit(1); } bam1_t* b = bam_init1(); uint32_t n = 0; while (samread(f, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%d reads\n", n); } inc_hash_table(T, bam1_qname(b), b->core.l_qname); } bam_destroy1(b); samclose(f); fprintf(stderr, "done.\n"); return T; }
struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, bam_hdr_t *header, int count, struct lm *lm) /* Read next count alignments in SAM format, allocated in lm. May return less than * count at end of file. */ { /* Set up helper. */ struct bamToSamHelper helper; helper.lm = lm; helper.chrom = NULL; helper.dy = dyStringNew(0); helper.samFile = fh; helper.samList = NULL; /* Loop through calling our own fetch function */ int i; bam1_t *b = bam_init1(); for (i=0; i<count; ++i) { if (sam_read1(fh, header, b) < 0) break; bamAddOneSamAlignment(b, &helper, header); } bam_destroy1(b); /* Clean up and go home. */ dyStringFree(&helper.dy); slReverse(&helper.samList); return helper.samList; }
static bool readgroupise(state_t* state) { if (sam_hdr_write(state->output_file, state->output_header) != 0) { print_error_errno("addreplacerg", "[%s] Could not write header to output file", __func__); return false; } bam1_t* file_read = bam_init1(); int ret; while ((ret = sam_read1(state->input_file, state->input_header, file_read)) >= 0) { state->mode_func(state, file_read); if (sam_write1(state->output_file, state->output_header, file_read) < 0) { print_error_errno("addreplacerg", "[%s] Could not write read to output file", __func__); bam_destroy1(file_read); return false; } } bam_destroy1(file_read); if (ret != -1) { print_error_errno("addreplacerg", "[%s] Error reading from input file", __func__); return false; } else { return true; } }
static int aux_fields1(void) { static const char sam[] = "data:" "@SQ\tSN:one\tLN:1000\n" "@SQ\tSN:two\tLN:500\n" "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tZZ:i:1000000\n"; // Canonical form of the alignment record above, as output by sam_format1() static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tZZ:i:1000000"; samFile *in = sam_open(sam, "r"); bam_hdr_t *header = sam_hdr_read(in); bam1_t *aln = bam_init1(); uint8_t *p; uint32_t n; kstring_t ks = { 0, 0, NULL }; if (sam_read1(in, header, aln) >= 0) { if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k') fail("XA field is '%c', expected 'k'", bam_aux2A(p)); if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37) fail("Xi field is %d, expected 37", bam_aux2i(p)); if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6) fail("Xf field is %.12f, expected pi", bam_aux2f(p)); if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6) fail("Xf field is %.12f, expected e", bam_aux2f(p)); if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0) fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO); if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0) fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF); // TODO Invent and use bam_aux2B() if ((p = check_bam_aux_get(aln, "XB", 'B')) && ! (memcmp(p, "Bc", 2) == 0 && (memcpy(&n, p+2, 4), n) == 3 && memcmp(p+6, "\xfe\x00\x02", 3) == 0)) fail("XB field is %c,..., expected c,-2,0,+2", p[1]); if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000) fail("ZZ field is %d, expected 1000000", bam_aux2i(p)); if (sam_format1(header, aln, &ks) < 0) fail("can't format record"); if (strcmp(ks.s, r1) != 0) fail("record formatted incorrectly: \"%s\"", ks.s); free(ks.s); } else fail("can't read record"); bam_destroy1(aln); bam_hdr_destroy(header); sam_close(in); return 1; }
static int fill_buf(samfile_t *in, buffer_t *buf) { int i, ret, last_tid, min_rpos = 0x7fffffff, capacity; bam1_t *b = bam_init1(); bam1_core_t *c = &b->core; // squeeze out the empty cells at the beginning for (i = 0; i < buf->n; ++i) if (buf->buf[i].b) break; if (i < buf->n) { // squeeze if (i > 0) { memmove(buf->buf, buf->buf + i, sizeof(elem_t) * (buf->n - i)); buf->n = buf->n - i; } } else buf->n = 0; // calculate min_rpos for (i = 0; i < buf->n; ++i) { elem_t *e = buf->buf + i; if (e->b && e->rpos >= 0 && e->rpos < min_rpos) min_rpos = buf->buf[i].rpos; } // fill the buffer buf->x = -1; last_tid = buf->n? buf->buf[0].b->core.tid : -1; capacity = buf->n + BLOCK_SIZE; while ((ret = samread(in, b)) >= 0) { elem_t *e; uint8_t *qual = bam1_qual(b); int is_mapped; if (last_tid < 0) last_tid = c->tid; if (c->tid != last_tid) { if (buf->x < 0) buf->x = buf->n; } if (buf->n >= buf->max) { // enlarge buf->max = buf->max? buf->max<<1 : 8; buf->buf = (elem_t*)realloc(buf->buf, sizeof(elem_t) * buf->max); } e = &buf->buf[buf->n++]; e->b = bam_dup1(b); e->rpos = -1; e->score = 0; for (i = 0; i < c->l_qseq; ++i) e->score += qual[i] + 1; e->score = (double)e->score / sqrt(c->l_qseq + 1); is_mapped = (c->tid < 0 || c->tid >= in->header->n_targets || (c->flag&BAM_FUNMAP))? 0 : 1; if (!is_mapped) e->score = -1; if (is_mapped && (c->flag & BAM_FREVERSE)) { e->rpos = b->core.pos + bam_calend(&b->core, bam1_cigar(b)); if (min_rpos > e->rpos) min_rpos = e->rpos; } if (buf->n >= capacity) { if (is_mapped && c->pos <= min_rpos) capacity += BLOCK_SIZE; else break; } } if (ret >= 0 && buf->x < 0) buf->x = buf->n; bam_destroy1(b); return buf->n; }
SamCtrl::SamCtrl() { m_out_path = "-"; m_out_mode = "w"; m_fn_list = 0; m_in = 0; m_out = 0; m_b = bam_init1(); // initialize m_b }
int samfetch(samfile_t *fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) { bam1_t *b = bam_init1(); hts_itr_t *iter = sam_itr_queryi(idx, tid, beg, end); int ret; while ((ret = sam_itr_next(fp->file, iter, b)) >= 0) func(b, data); hts_itr_destroy(iter); bam_destroy1(b); return (ret == -1)? 0 : ret; }
static int _count_reads(char *path) { int count = 0; samfile_t *bamfile = b2g_samfile_open(path, "rb", 0); if (!bamfile) return 0; bam1_t *bam = bam_init1(); while (-1 < samread(bamfile, bam)) count++; bam_destroy1(bam); samclose(bamfile); return count; }
bam1_t * SAM_istream::read() throw (SAM_IO_Error) { if (sam_file == NULL) { throw SAM_IO_Error(SAM_IO_Error::file_not_opened, "tried to read from a not previously opened file"); } bam1_t * b = bam_init1(); int bytes = samread(sam_file,b); if (bytes == -1) end_of_file = true; return b; }
int sam_fetch(char *ifn, char *ofn, char *reg, void *data, sam_fetch_f func) { int ret = 0; samfile_t *in = samopen(ifn, "rb", 0); samfile_t *out = 0; if (ofn) out = samopen(ofn, "wb", in->header); if (reg) { bam_index_t *idx = bam_index_load(ifn); if (idx == 0) { fprintf(stderr, "[%s:%d] Random alignment retrieval only works for indexed BAM files.\n", __func__, __LINE__); exit(1); } int tid, beg, end; bam_parse_region(in->header, reg, &tid, &beg, &end); if (tid < 0) { fprintf(stderr, "[%s:%d] Region \"%s\" specifies an unknown reference name. \n", __func__, __LINE__, reg); exit(1); } bam_iter_t iter; bam1_t *b = bam_init1(); iter = bam_iter_query(idx, tid, beg, end); while ((ret = bam_iter_read(in->x.bam, iter, b)) >= 0) func(b, in, out, data); bam_iter_destroy(iter); bam_destroy1(b); bam_index_destroy(idx); } else { bam1_t *b = bam_init1(); while ((ret = samread(in, b)) >= 0) func(b, in, out, data); bam_destroy1(b); } if (out) samclose(out); samclose(in); if (ret != -1) { /* truncated is -2 */ fprintf(stderr, "[%s:%d] Alignment retrieval failed due to truncated file\n", __func__, __LINE__); exit(1); } return ret; }
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual) { bwa_seq_t *seqs, *p; int n_seqs, l, i; long n_trimmed = 0, n_tot = 0; bam1_t *b; int res; b = bam_init1(); n_seqs = 0; seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); while ((res = bam_read1(bs->fp, b)) >= 0) { uint8_t *s, *q; int go = 0; if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1; if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1; if (go == 0) continue; l = b->core.l_qseq; p = &seqs[n_seqs++]; p->tid = -1; // no assigned to a thread p->qual = 0; p->full_len = p->clip_len = p->len = l; n_tot += p->full_len; s = bam1_seq(b); q = bam1_qual(b); p->seq = (ubyte_t*)calloc(p->len + 1, 1); p->qual = (ubyte_t*)calloc(p->len + 1, 1); for (i = 0; i != p->full_len; ++i) { p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)]; p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126; } if (bam1_strand(b)) { // then reverse seq_reverse(p->len, p->seq, 1); seq_reverse(p->len, p->qual, 0); } if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p); p->rseq = (ubyte_t*)calloc(p->full_len, 1); memcpy(p->rseq, p->seq, p->len); seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() seq_reverse(p->len, p->rseq, is_comp); p->name = strdup((const char*)bam1_qname(b)); if (n_seqs == n_needed) break; } if (res < 0 && res != -1) err_fatal_simple("Error reading bam file"); *n = n_seqs; if (n_seqs && trim_qual >= 1) fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); if (n_seqs == 0) { free(seqs); bam_destroy1(b); return 0; } bam_destroy1(b); return seqs; }
int bam_pad2unpad(bamFile in, bamFile out) { bam_header_t *h; bam1_t *b; kstring_t r, q; uint32_t *cigar2 = 0; int n2 = 0, m2 = 0, *posmap = 0; h = bam_header_read(in); bam_header_write(out, h); b = bam_init1(); r.l = r.m = q.l = q.m = 0; r.s = q.s = 0; while (bam_read1(in, b) >= 0) { uint32_t *cigar = bam1_cigar(b); n2 = 0; if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) { int i, k; unpad_seq(b, &r); write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH)); replace_cigar(b, n2, cigar2); posmap = realloc(posmap, r.m * sizeof(int)); for (i = k = 0; i < r.l; ++i) { posmap[i] = k; // note that a read should NOT start at a padding if (r.s[i]) ++k; } } else { int i, k, op; unpad_seq(b, &q); if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[0]); for (i = 0, k = b->core.pos; i < q.l; ++i, ++k) q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD); for (i = k = 1, op = q.s[0]; i < q.l; ++i) { if (op != q.s[i]) { write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op)); op = q.s[i]; k = 1; } else ++k; } write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op)); if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]); for (i = 2; i < n2; ++i) if (bam_cigar_op(cigar2[i]) == BAM_CMATCH && bam_cigar_op(cigar2[i-1]) == BAM_CPAD && bam_cigar_op(cigar2[i-2]) == BAM_CMATCH) cigar2[i] += cigar2[i-2], cigar2[i-2] = cigar2[i-1] = 0; for (i = k = 0; i < n2; ++i) if (cigar2[i]) cigar2[k++] = cigar2[i]; n2 = k; replace_cigar(b, n2, cigar2); b->core.pos = posmap[b->core.pos]; } bam_write1(out, b); } free(r.s); free(q.s); free(posmap); bam_destroy1(b); bam_header_destroy(h); return 0; }
hash_table* hash_ids(const char* fn) { fprintf(stderr, "hashing ... \n"); hash_table* T = create_hash_table(); samfile_t* f = samopen(fn, "rb", NULL); if (f == NULL) { fprintf(stderr, "can't open bam file %s\n", fn); exit(1); } bam1_t* b = bam_init1(); uint32_t n = 0; char* qname = NULL; size_t qname_size = 0; while (samread(f, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%d reads\n", n); } if (qname_size < b->core.l_qname + 3) { qname_size = b->core.l_qname + 3; qname = realloc(qname, qname_size); } memcpy(qname, bam1_qname(b), b->core.l_qname); if (b->core.flag & BAM_FREAD2) { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '2'; qname[b->core.l_qname + 2] = '\0'; } else { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '1'; qname[b->core.l_qname + 2] = '\0'; } inc_hash_table(T, qname, b->core.l_qname + 2); } free(qname); bam_destroy1(b); samclose(f); fprintf(stderr, "done.\n"); return T; }
void test_cigar_to_spans() { char *sam_filename = "testdata/RUM.sam"; samfile_t *samfile = samopen(sam_filename, "r", NULL); bam1_t *rec = bam_init1(); struct SpanAssertion { int read_num; int num_spans; struct Span spans[10]; }; struct SpanAssertion cases[] = { { 102, 1, { { 12465667, 12465724 } } }, { 104, 1, { { 2095233, 2095289 } } }, { 128, 1, { { 152316, 152373 } } }, { 162, 1, { { 14232813, 14232886 } } }, { 172, 2, { { 3619619, 3619627 }, { 3619984, 3620048 } } }, { 642, 1, { { 15291546, 15291622 } } }, { 670, 2, { { 3950665, 3950724 }, { 3951436, 3951453 } } } }; int num_cases = sizeof(cases) / sizeof(struct SpanAssertion); int read_num = 0; int case_num = 0; CigarCursor curs; while (case_num < num_cases && samread(samfile, rec) > 0) { if (cases[case_num].read_num == read_num) { int num_spans = cases[case_num].num_spans; Span *span; init_cigar_cursor(&curs, rec); for (span = cases[case_num].spans; span < cases[case_num].spans + num_spans; span++) { assert_equals(1, next_span(&curs), "Should have found a span"); assert_equals(span->start, curs.start, "Start"); assert_equals(span->end, curs.end, "End"); } assert_equals(0, next_span(&curs), "No more spans"); case_num++; } read_num++; } if (case_num < num_cases) assert_equals(0, 1, "Ran out of records in sam file"); }
void test_next_fragment_single() { char *sam_filename = "testdata/test_next_fragment_single.sam"; samfile_t *samfile = samopen(sam_filename, "r", NULL); bam1_t *reads[] = { bam_init1(), bam_init1() }; int len; len = next_fragment(reads, samfile, 2); assert_equals(1, len, "Num reads"); assert_str_equals("seq.1", bam1_qname(reads[0]), "qname"); len = next_fragment(reads, samfile, 2); assert_equals(1, len, "Num reads"); assert_str_equals("seq.2", bam1_qname(reads[0]), "qname"); len = next_fragment(reads, samfile, 2); assert_equals(0, len, "Num reads"); samclose(samfile); }
void hash_reads( table* T, const char* reads_fn, interval_stack* is ) { samfile_t* reads_f = samopen( reads_fn, "rb", NULL ); if( reads_f == NULL ) { failf( "Can't open bam file '%s'.", reads_fn ); } bam_index_t* reads_index = bam_index_load( reads_fn ); if( reads_index == NULL ) { failf( "Can't open bam index '%s.bai'.", reads_fn ); } bam_init_header_hash( reads_f->header ); table_create( T, reads_f->header->n_targets ); T->seq_names = (char**)malloc( sizeof(char*) * reads_f->header->n_targets ); size_t k; for( k = 0; k < reads_f->header->n_targets; k++ ) { T->seq_names[k] = strdup(reads_f->header->target_name[k]); } log_puts( LOG_MSG, "hashing reads ... \n" ); log_indent(); bam_iter_t read_iter; bam1_t* read = bam_init1(); int tid; interval_stack::iterator i; for( i = is->begin(); i != is->end(); i++ ) { tid = bam_get_tid( reads_f->header, i->seqname ); if( tid < 0 ) continue; read_iter = bam_iter_query( reads_index, tid, i->start, i->end ); while( bam_iter_read( reads_f->x.bam, read_iter, read ) >= 0 ) { if( bam1_strand(read) == i->strand ) { table_inc( T, read ); } } bam_iter_destroy(read_iter); } bam_destroy1(read); log_unindent(); log_printf( LOG_MSG, "done. (%zu unique reads hashed)\n", T->m ); bam_index_destroy(reads_index); samclose(reads_f); }
int main(int argc, char *argv[]) { char *progname; char *bamfilename; int32_t tid; samfile_t *bamin; bam_index_t *bamidx; bam_plbuf_t *buf; bam1_t *bam_read; uint32_t next_pos = 1; progname = *argv; argv++; argc--; if (argc < 2) { printf("Usage: %s bam_file tid\n", progname); exit(1); } else { bamfilename = argv[0]; tid = strtol(argv[1], NULL, 10); } /* try to open bam file */ bamin = samopen(bamfilename, "rb", NULL); if (!bamin) { fprintf(stderr, "Error opening bamfile %s\n", bamfilename); exit(1); } /* try to open index */ bamidx = bam_index_load(bamfilename); if (!bamidx) { fprintf(stderr, "Error opening index for %s\n", bamfilename); exit(1); } bam_read = bam_init1(); buf = bam_plbuf_init(&pileup_func, &next_pos); /* disable maximum pileup depth */ bam_plp_set_maxcnt(buf->iter, INT_MAX); bam_fetch(bamin->x.bam, bamidx, tid, 0, INT_MAX, buf, &fetch_func); bam_plbuf_push(0, buf); /* finish pileup */ bam_plbuf_destroy(buf); bam_destroy1(bam_read); bam_index_destroy(bamidx); samclose(bamin); return 0; }
int main(int argc, char** argv) { if(argc < 3) { printf("No input nor output files provided"); return -1; } bamFile in = bam_open(argv[1], "r"); bam_header_t* header; if (in == NULL) { printf("opening input file failed"); return -1; } bam1_t* b = bam_init1(); bamFile out = bam_open(argv[2], "w"); if (out == NULL) { printf("opening input file failed"); return -1; } header = bam_header_read(in); if(bam_header_write(out, header) < 0) { printf("writing header failed"); } long nextPrunedId; if(!scanf ("%lu", &nextPrunedId)) { printf("warning: no ids provided"); return -1; } long id = 0; while (bam_read1(in, b) >= 0) { // write BAM back if (nextPrunedId != id++) { bam_write1(out, b); } else { // fprintf(stderr, "pruning: id: %lu, pos: %d, length: %d\n", nextPrunedId, b->core.pos, b->core.l_qseq); if(!scanf ("%lu", &nextPrunedId)) { break; } } } // closing all resources bam_header_destroy(header); bam_close(in); bam_close(out); bam_destroy1(b); return 0; }
int main(int argc, char *argv[]) { samfile_t *fp; if ((fp = samopen(argv[1], "rb", 0)) == 0) { fprintf(stderr, "showbam: Fail to open BAM file %s\n", argv[1]); return 1; } bam1_t *b = bam_init1(); while (samread(fp, b) >= 0) fetch_func(b); bam_destroy1(b); samclose(fp); return 0; }