Beispiel #1
1
Datei: sam.c Projekt: atks/vt
static void copy_check_alignment(const char *infname, const char *informat,
    const char *outfname, const char *outmode, const char *outref)
{
    samFile *in = sam_open(infname, "r");
    samFile *out = sam_open(outfname, outmode);
    bam1_t *aln = bam_init1();
    bam_hdr_t *header = NULL;
    int res;

    if (!in) {
        fail("couldn't open %s", infname);
        goto err;
    }
    if (!out) {
        fail("couldn't open %s with mode %s", outfname, outmode);
        goto err;
    }
    if (!aln) {
        fail("bam_init1() failed");
        goto err;
    }

    if (outref) {
        if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0) {
            fail("setting reference %s for %s", outref, outfname);
            goto err;
        }
    }

    header = sam_hdr_read(in);
    if (!header) {
        fail("reading header from %s", infname);
        goto err;
    }
    if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname);

    while ((res = sam_read1(in, header, aln)) >= 0) {
        int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4;
        if (mod4 != 0)
            fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"",
                 informat, mod4, bam_get_qname(aln));

        if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname);
    }
    if (res < -1) {
        fail("failed to read alignment from %s", infname);
    }

 err:
    bam_destroy1(aln);
    bam_hdr_destroy(header);
    if (in) sam_close(in);
    if (out) sam_close(out);
}
Beispiel #2
0
// currently, this function ONLY works if each read has one hit
void bam_mating_core(bamFile in, bamFile out)
{
	bam_header_t *header;
	bam1_t *b[2];
	int curr, has_prev, pre_end = 0, cur_end;
	kstring_t str;

	str.l = str.m = 0; str.s = 0;
	header = bam_header_read(in);
	bam_header_write(out, header);

	b[0] = bam_init1();
	b[1] = bam_init1();
	curr = 0; has_prev = 0;
	while (bam_read1(in, b[curr]) >= 0) {
		bam1_t *cur = b[curr], *pre = b[1-curr];
		if (cur->core.tid < 0) continue;
		cur_end = bam_calend(&cur->core, bam1_cigar(cur));
		if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
		if (cur->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
		if (has_prev) {
			if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
				cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
				pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
				if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
					&& !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE
				{
					uint32_t cur5, pre5;
					cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
					pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
					cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
				} else cur->core.isize = pre->core.isize = 0;
				if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
				else cur->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
				else pre->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
				if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
				bam_template_cigar(pre, cur, &str);
				bam_write1(out, pre);
				bam_write1(out, cur);
				has_prev = 0;
			} else { // unpaired or singleton
				pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
				if (pre->core.flag & BAM_FPAIRED) {
					pre->core.flag |= BAM_FMUNMAP;
					pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
				}
				bam_write1(out, pre);
			}
		} else has_prev = 1;
		curr = 1 - curr;
		pre_end = cur_end;
	}
	if (has_prev) bam_write1(out, b[1-curr]);
	bam_header_destroy(header);
	bam_destroy1(b[0]);
	bam_destroy1(b[1]);
	free(str.s);
}
SamPairIterator::SamPairIterator(const std::shared_ptr<htsFile>& sam_file_ptr, const std::shared_ptr<bam_hdr_t>& sam_header_ptr) :
  m_sam_file_ptr    {sam_file_ptr},
  m_sam_header_ptr  {sam_header_ptr},
  m_sam_record_ptr1 {utils::make_shared_sam(bam_init1())}, ///< important to initialize the record buffer in the constructor so we can reuse it across the iterator
  m_sam_record_ptr2 {utils::make_shared_sam(bam_init1())}, ///< important to initialize the record buffer in the constructor so we can reuse it across the iterator
  m_sam_records     {fetch_next_pair()} ///< important queue must be initialized *before* we call fetch_next_pair. Order matters
{}
Beispiel #4
0
static void _check_quality(char *OUTPUT_PREFIX, int WRITE_LOWQ, int WRITE_SPLITREAD, int MAPPING_QUALITY, int MIN_ALIGNED_PCT, int IGNORE_DUPLICATES) {
  bam1_t *b1 = bam_init1(), *b2 = bam_init1();;
  if (WRITE_SPLITREAD) {
    samfile_t *split_file = b2g_samfile_open("%s_splitread.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(split_file, b1)) {
      samread(split_file, b2);
      assert(b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(split_file);
  }
  if (WRITE_LOWQ) {
    samfile_t *lowq_file = b2g_samfile_open("%s_lowqual.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(lowq_file, b1)) {
      samread(lowq_file, b2);
      if (WRITE_SPLITREAD) {
	assert(!b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
	assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
      }
      else assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES) || b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(lowq_file);
  }

  bam_destroy1(b1);
  bam_destroy1(b2);
}
Beispiel #5
0
int famstats_sum_main(int argc, char *argv[]) {
    if(argc < 2)
        return sum_usage(argv);
    if(strcmp(argv[1], "--help") == 0) {
        return sum_usage(argv, EXIT_SUCCESS);
    }
    int c;
    FILE *ofp(stdout);
    while((c = getopt(argc, argv, "o:h?")) > -1) {
        switch(c) {
        case 'o': ofp = fopen(optarg, "w"); break;
        case 'h': case '?': return sum_usage(argv, EXIT_SUCCESS);
        }
    }
    fputs("Filename: count\n", ofp);
    for(int i(1); i < argc; ++i) {
        dlib::check_bam_tag_exit(argv[i], "FM");
        dlib::BamHandle in(argv[i]);
        bam1_t *b(bam_init1());
        size_t count(0);
        while(sam_read1(in.fp, in.header, b) >= 0)
            if((b->core.flag & (BAM_FSECONDARY | BAM_FSUPPLEMENTARY | BAM_FREAD2)) == 0) 
                count += bam_itag(b,"FM");
        fprintf(ofp, "%s: %lu\n", argv[i], count);
        bam_destroy1(b);
    }
    fclose(ofp);
    return EXIT_SUCCESS;
}
Beispiel #6
0
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
                 int pos, bool reverse, const char* qseq,
                 const char* cigar, const char* quals):iflags(0), exons(1),
                		 clipL(0), clipR(0), mapped_len(0) {
   novel=true;
   bam_header=NULL;
   b=bam_init1();
   if (pos<=0 || gseq_tid<0) {
               b->core.pos=-1; //unmapped
               b->core.flag |= BAM_FUNMAP;
               gseq_tid=-1;
               }
          else b->core.pos=pos-1; //BAM is 0-based
   b->core.tid=gseq_tid;
   b->core.qual=255;
   b->core.mtid=-1;
   b->core.mpos=-1;
   int l_qseq=strlen(qseq);
   //this may not be accurate, setting CIGAR is the correct way
   //b->core.bin = bam_reg2bin(b->core.pos, b->core.pos+l_qseq-1);
   b->core.l_qname=strlen(qname)+1; //includes the \0 at the end
   memcpy(realloc_bdata(b, b->core.l_qname), qname, b->core.l_qname);
   set_cigar(cigar); //this will also set core.bin
   add_sequence(qseq, l_qseq);
   add_quals(quals); //quals must be given as Phred33
   if (reverse) { b->core.flag |= BAM_FREVERSE ; }
   }
Beispiel #7
0
// remember to clean up with bam_destroy1(b);
bam1_t* alignment_to_bam(const string& sam_header,
                         const Alignment& alignment,
                         const string& refseq,
                         const int32_t refpos,
                         const string& cigar,
                         const string& mateseq,
                         const int32_t matepos,
                         const int32_t tlen) {

    assert(!sam_header.empty());
    string sam_file = "data:" + sam_header + alignment_to_sam(alignment, refseq, refpos, cigar, mateseq, matepos, tlen);
    const char* sam = sam_file.c_str();
    samFile *in = sam_open(sam, "r");
    bam_hdr_t *header = sam_hdr_read(in);
    bam1_t *aln = bam_init1();
    if (sam_read1(in, header, aln) >= 0) {
        bam_hdr_destroy(header);
        sam_close(in); // clean up
        return aln;
    } else {
        cerr << "[vg::alignment] Failure to parse SAM record" << endl
             << sam << endl;
        exit(1);
    }
}
BamMerge::BamMerge(const vector<string>& bam_fnames,
		   vector<int64_t> file_offsets) :
  _bam_fnames(bam_fnames),
  _lines(less_bam(true)),
  _last_id(0)
{
  if (bam_fnames.size() <= 0)
    return;
  
  for (size_t i = 0; i < _bam_fnames.size(); ++i) {
    const char* fname = _bam_fnames[i].c_str();
    samfile_t* fp = samopen(fname, "rb", 0);
    if (fp==0) {
      warn_msg(ERR_BAM_OPEN, fname);
      exit(1);
    }

    if (bam_fnames.size() == file_offsets.size() &&
	file_offsets[i] > 0)
      bgzf_seek(fp->x.bam, file_offsets[i], SEEK_SET);

    bam1_t* b = bam_init1();
    if (samread(fp, b) > 0) {
      _src_files.push_back(fp);
      CBamLine brec(_lines.size(), b, fp->header);
      _lines.push(brec);
    }
    else { bam_destroy1(b); }
  }

  if (_lines.size() == 0) {
    warn_msg("Warning: no input BAM records found.\n");
    exit(1);
  }
}
Beispiel #9
0
int main(int argc, char **argv) {
    dlib::BamHandle in = dlib::BamHandle("bed_test.bam");
    dlib::ParsedBed bed = dlib::ParsedBed("bed_test.bed", in.header);
    bam1_t *b = bam_init1();
    size_t diffs = 0;
    void *lh3bed = bed_read("bed_test.bed");
    samFile *so = sam_open("disagreed.bam", "wb9");
    sam_hdr_write(so, in.header);
    size_t disagrees = 0, agrees = 0;
    int dbr = 0, lh3r = 0;
    while(in.read(b) != -1) {
        if(b->core.flag & (BAM_FUNMAP)) continue;
        if((dbr = bed.bam1_test(b)) != (lh3r = bed_overlap(lh3bed, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)))) {
            LOG_EXIT("dbr: %i. lh3r: %i. Contig: %s. Position: %i. endpos; %i\n", dbr, lh3r, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b));
            if(++disagrees % 100 == 0) LOG_DEBUG("disagrees: %lu.\n", disagrees);
            sam_write1(so, in.header, b);
        } else {
            if(++agrees % 500000 == 0) LOG_DEBUG("agrees: %lu.\n", agrees);
        }
    }
    sam_close(so);
    bam_destroy1(b);
    bed_destroy(lh3bed);
    return EXIT_SUCCESS;
}
Beispiel #10
0
static bool bam2fq_mainloop(bam2fq_state_t *state)
{
    // process a name collated BAM into fastq
    bam1_t* b = bam_init1();
    if (b == NULL) {
        perror(NULL);
        return false;
    }
    int64_t n_reads = 0; // Statistics
    kstring_t linebuf = { 0, 0, NULL }; // Buffer
    while (sam_read1(state->fp, state->h, b) >= 0) {
        if (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments
            || (b->core.flag&(state->flag_on)) != state->flag_on             // or reads indicated by filter flags
            || (b->core.flag&(state->flag_off)) != 0) continue;
        ++n_reads;

        if (!bam1_to_fq(b, &linebuf, state)) return false;
        fputs(linebuf.s, state->fpr[which_readpart(b)]);
    }
    free(linebuf.s);
    bam_destroy1(b);

    fprintf(stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
    return true;
}
Beispiel #11
0
BAMOrderedReader::BAMOrderedReader(std::string bam_file, std::vector<GenomeInterval>& intervals)
:bam_file(bam_file), intervals(intervals), sam(0), hdr(0), idx(0), itr(0)
{
    const char* fname = bam_file.c_str();
    int len = strlen(fname);
    if ( strcasecmp(".bam",fname+len-4) )
    {
        fprintf(stderr, "[%s:%d %s] Not a BAM file: %s\n", __FILE__, __LINE__, __FUNCTION__, bam_file.c_str());
        exit(1);
    }

    sam = sam_open(bam_file.c_str(), "r");
    hdr = sam_hdr_read(sam);
    s = bam_init1();

    idx = bam_index_load(bam_file.c_str());
    if (idx==0)
    {
        fprintf(stderr, "[%s:%d %s] fail to load index for %s\n", __FILE__, __LINE__, __FUNCTION__, bam_file.c_str());
        abort();
    }
    else
    {
        index_loaded = true;
    }

    str = {0,0,0};

    intervals_present =  intervals.size()!=0;
    interval_index = 0;

    random_access_enabled = intervals_present && index_loaded;
};
Beispiel #12
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        inc_hash_table(T, bam1_qname(b), b->core.l_qname);
    }

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Beispiel #13
0
struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, bam_hdr_t *header,  int count, struct lm *lm)
/* Read next count alignments in SAM format, allocated in lm.  May return less than
 * count at end of file. */
{
/* Set up helper. */
struct bamToSamHelper helper;
helper.lm = lm;
helper.chrom = NULL;
helper.dy = dyStringNew(0);
helper.samFile = fh;
helper.samList = NULL;

/* Loop through calling our own fetch function */
int i;
bam1_t *b = bam_init1();
for (i=0; i<count; ++i)
    {
    if (sam_read1(fh,   header,  b) < 0)
       break;
    bamAddOneSamAlignment(b, &helper, header);
    }
bam_destroy1(b);

/* Clean up and go home. */
dyStringFree(&helper.dy);
slReverse(&helper.samList);
return helper.samList;
}
Beispiel #14
0
static bool readgroupise(state_t* state)
{
    if (sam_hdr_write(state->output_file, state->output_header) != 0) {
        print_error_errno("addreplacerg", "[%s] Could not write header to output file", __func__);
        return false;
    }

    bam1_t* file_read = bam_init1();
    int ret;
    while ((ret = sam_read1(state->input_file, state->input_header, file_read)) >= 0) {
        state->mode_func(state, file_read);

        if (sam_write1(state->output_file, state->output_header, file_read) < 0) {
            print_error_errno("addreplacerg", "[%s] Could not write read to output file", __func__);
            bam_destroy1(file_read);
            return false;
        }
    }
    bam_destroy1(file_read);
    if (ret != -1) {
        print_error_errno("addreplacerg", "[%s] Error reading from input file", __func__);
        return false;
    } else {
        return true;
    }
}
Beispiel #15
0
static int aux_fields1(void)
{
    static const char sam[] = "data:"
"@SQ\tSN:one\tLN:1000\n"
"@SQ\tSN:two\tLN:500\n"
"r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tZZ:i:1000000\n";

    // Canonical form of the alignment record above, as output by sam_format1()
    static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tZZ:i:1000000";

    samFile *in = sam_open(sam, "r");
    bam_hdr_t *header = sam_hdr_read(in);
    bam1_t *aln = bam_init1();
    uint8_t *p;
    uint32_t n;
    kstring_t ks = { 0, 0, NULL };

    if (sam_read1(in, header, aln) >= 0) {
        if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k')
            fail("XA field is '%c', expected 'k'", bam_aux2A(p));

        if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37)
            fail("Xi field is %d, expected 37", bam_aux2i(p));

        if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6)
            fail("Xf field is %.12f, expected pi", bam_aux2f(p));

        if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6)
            fail("Xf field is %.12f, expected e", bam_aux2f(p));

        if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0)
            fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO);

        if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0)
            fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF);

        // TODO Invent and use bam_aux2B()
        if ((p = check_bam_aux_get(aln, "XB", 'B')) && ! (memcmp(p, "Bc", 2) == 0 && (memcpy(&n, p+2, 4), n) == 3 && memcmp(p+6, "\xfe\x00\x02", 3) == 0))
            fail("XB field is %c,..., expected c,-2,0,+2", p[1]);

        if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000)
            fail("ZZ field is %d, expected 1000000", bam_aux2i(p));

        if (sam_format1(header, aln, &ks) < 0)
            fail("can't format record");

        if (strcmp(ks.s, r1) != 0)
            fail("record formatted incorrectly: \"%s\"", ks.s);

        free(ks.s);
    }
    else fail("can't read record");

    bam_destroy1(aln);
    bam_hdr_destroy(header);
    sam_close(in);

    return 1;
}
Beispiel #16
0
static int fill_buf(samfile_t *in, buffer_t *buf)
{
	int i, ret, last_tid, min_rpos = 0x7fffffff, capacity;
	bam1_t *b = bam_init1();
	bam1_core_t *c = &b->core;
	// squeeze out the empty cells at the beginning
	for (i = 0; i < buf->n; ++i)
		if (buf->buf[i].b) break;
	if (i < buf->n) { // squeeze
		if (i > 0) {
			memmove(buf->buf, buf->buf + i, sizeof(elem_t) * (buf->n - i));
			buf->n = buf->n - i;
		}
	} else buf->n = 0;
	// calculate min_rpos
	for (i = 0; i < buf->n; ++i) {
		elem_t *e = buf->buf + i;
		if (e->b && e->rpos >= 0 && e->rpos < min_rpos)
			min_rpos = buf->buf[i].rpos;
	}
	// fill the buffer
	buf->x = -1;
	last_tid = buf->n? buf->buf[0].b->core.tid : -1;
	capacity = buf->n + BLOCK_SIZE;
	while ((ret = samread(in, b)) >= 0) {
		elem_t *e;
		uint8_t *qual = bam1_qual(b);
		int is_mapped;
		if (last_tid < 0) last_tid = c->tid;
		if (c->tid != last_tid) {
			if (buf->x < 0) buf->x = buf->n;
		}
		if (buf->n >= buf->max) { // enlarge
			buf->max = buf->max? buf->max<<1 : 8;
			buf->buf = (elem_t*)realloc(buf->buf, sizeof(elem_t) * buf->max);
		}
		e = &buf->buf[buf->n++];
		e->b = bam_dup1(b);
		e->rpos = -1; e->score = 0;
		for (i = 0; i < c->l_qseq; ++i) e->score += qual[i] + 1;
		e->score = (double)e->score / sqrt(c->l_qseq + 1);
		is_mapped = (c->tid < 0 || c->tid >= in->header->n_targets || (c->flag&BAM_FUNMAP))? 0 : 1;
		if (!is_mapped) e->score = -1;
		if (is_mapped && (c->flag & BAM_FREVERSE)) {
			e->rpos = b->core.pos + bam_calend(&b->core, bam1_cigar(b));
			if (min_rpos > e->rpos) min_rpos = e->rpos;
		}
		if (buf->n >= capacity) {
			if (is_mapped && c->pos <= min_rpos) capacity += BLOCK_SIZE;
			else break;
		}
	}
	if (ret >= 0 && buf->x < 0) buf->x = buf->n;
	bam_destroy1(b);
	return buf->n;
}
Beispiel #17
0
SamCtrl::SamCtrl()
{
	m_out_path = "-";
	m_out_mode = "w";
	m_fn_list = 0;
	m_in = 0;
	m_out = 0;
	m_b = bam_init1(); // initialize m_b

}
Beispiel #18
0
int samfetch(samfile_t *fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
{
    bam1_t *b = bam_init1();
    hts_itr_t *iter = sam_itr_queryi(idx, tid, beg, end);
    int ret;
    while ((ret = sam_itr_next(fp->file, iter, b)) >= 0) func(b, data);
    hts_itr_destroy(iter);
    bam_destroy1(b);
    return (ret == -1)? 0 : ret;
}
Beispiel #19
0
static int _count_reads(char *path) {
  int count = 0;
  samfile_t *bamfile = b2g_samfile_open(path, "rb", 0);
  if (!bamfile) return 0;
  bam1_t *bam = bam_init1();
  while (-1 < samread(bamfile, bam)) count++;
  bam_destroy1(bam);
  samclose(bamfile);
  return count;
}
Beispiel #20
0
bam1_t * SAM_istream::read() throw (SAM_IO_Error) {
	if (sam_file == NULL) {
		throw SAM_IO_Error(SAM_IO_Error::file_not_opened, "tried to read from a not previously opened file");
	}
	bam1_t * b = bam_init1();
	int bytes = samread(sam_file,b);
	if (bytes == -1)
		end_of_file = true;
	return b;
}
Beispiel #21
0
int sam_fetch(char *ifn, char *ofn, char *reg, void *data, sam_fetch_f func) {
	int ret = 0;
	samfile_t *in = samopen(ifn, "rb", 0);
	samfile_t *out = 0;
	if (ofn) out = samopen(ofn, "wb", in->header);

	if (reg) {
		bam_index_t *idx = bam_index_load(ifn);
		if (idx == 0) {
			fprintf(stderr, "[%s:%d] Random alignment retrieval only works for indexed BAM files.\n",
							__func__, __LINE__);
			exit(1);
		}
		int tid, beg, end;
		bam_parse_region(in->header, reg, &tid, &beg, &end);
		if (tid < 0) {
			fprintf(stderr, "[%s:%d] Region \"%s\" specifies an unknown reference name. \n",
							__func__, __LINE__, reg);
			exit(1);
		}
		bam_iter_t iter;
		bam1_t *b = bam_init1();
		iter = bam_iter_query(idx, tid, beg, end);
		while ((ret = bam_iter_read(in->x.bam, iter, b)) >= 0) func(b, in, out, data);
		bam_iter_destroy(iter);
		bam_destroy1(b);
		bam_index_destroy(idx);
	} else {
		bam1_t *b = bam_init1();
		while ((ret = samread(in, b)) >= 0) func(b, in, out, data);
		bam_destroy1(b);
	}
	if (out) samclose(out);
	samclose(in);
			
	if (ret != -1) {					/* truncated is -2 */
		fprintf(stderr, "[%s:%d] Alignment retrieval failed due to truncated file\n",
						__func__, __LINE__);
		exit(1);
	}

	return ret;
}
Beispiel #22
0
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	int n_seqs, l, i;
	long n_trimmed = 0, n_tot = 0;
	bam1_t *b;
	int res;

	b = bam_init1();
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((res = bam_read1(bs->fp, b)) >= 0) {
		uint8_t *s, *q;
		int go = 0;
		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
		if (go == 0) continue;
		l = b->core.l_qseq;
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		s = bam1_seq(b); q = bam1_qual(b);
		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
		for (i = 0; i != p->full_len; ++i) {
			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
		}
		if (bam1_strand(b)) { // then reverse 
			seq_reverse(p->len, p->seq, 1);
			seq_reverse(p->len, p->qual, 0);
		}
		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)bam1_qname(b));
		if (n_seqs == n_needed) break;
	}
	if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		bam_destroy1(b);
		return 0;
	}
	bam_destroy1(b);
	return seqs;
}
Beispiel #23
0
int bam_pad2unpad(bamFile in, bamFile out)
{
	bam_header_t *h;
	bam1_t *b;
	kstring_t r, q;
	uint32_t *cigar2 = 0;
	int n2 = 0, m2 = 0, *posmap = 0;

	h = bam_header_read(in);
	bam_header_write(out, h);
	b = bam_init1();
	r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
	while (bam_read1(in, b) >= 0) {
		uint32_t *cigar = bam1_cigar(b);
		n2 = 0;
		if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) {
			int i, k;
			unpad_seq(b, &r);
			write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
			replace_cigar(b, n2, cigar2);
			posmap = realloc(posmap, r.m * sizeof(int));
			for (i = k = 0; i < r.l; ++i) {
				posmap[i] = k; // note that a read should NOT start at a padding
				if (r.s[i]) ++k;
			}
		} else {
			int i, k, op;
			unpad_seq(b, &q);
			if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[0]);
			for (i = 0, k = b->core.pos; i < q.l; ++i, ++k)
				q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD);
			for (i = k = 1, op = q.s[0]; i < q.l; ++i) {
				if (op != q.s[i]) {
					write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
					op = q.s[i]; k = 1;
				} else ++k;
			}
			write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
			if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
			for (i = 2; i < n2; ++i)
				if (bam_cigar_op(cigar2[i]) == BAM_CMATCH && bam_cigar_op(cigar2[i-1]) == BAM_CPAD && bam_cigar_op(cigar2[i-2]) == BAM_CMATCH)
					cigar2[i] += cigar2[i-2], cigar2[i-2] = cigar2[i-1] = 0;
			for (i = k = 0; i < n2; ++i)
				if (cigar2[i]) cigar2[k++] = cigar2[i];
			n2 = k;
			replace_cigar(b, n2, cigar2);
			b->core.pos = posmap[b->core.pos];
		}
		bam_write1(out, b);
	}
	free(r.s); free(q.s); free(posmap);
	bam_destroy1(b);
	bam_header_destroy(h);
	return 0;
}
Beispiel #24
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    char* qname = NULL;
    size_t qname_size = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (qname_size < b->core.l_qname + 3) {
            qname_size = b->core.l_qname + 3;
            qname = realloc(qname, qname_size);
        }

        memcpy(qname, bam1_qname(b), b->core.l_qname);

        if (b->core.flag & BAM_FREAD2) {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '2';
            qname[b->core.l_qname + 2] = '\0';
        }
        else {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '1';
            qname[b->core.l_qname + 2] = '\0';
        }


        inc_hash_table(T, qname, b->core.l_qname + 2);
    }

    free(qname);

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Beispiel #25
0
void test_cigar_to_spans() {
  char *sam_filename = "testdata/RUM.sam";
  samfile_t *samfile = samopen(sam_filename, "r", NULL);  
  bam1_t *rec = bam_init1();

  struct SpanAssertion {
    int read_num;
    int num_spans;
    struct Span spans[10];
  };

  struct SpanAssertion cases[] = {
    { 102, 1, { { 12465667, 12465724 } } },
    { 104, 1, { { 2095233, 2095289 } } },
    { 128, 1, { { 152316, 152373 } } },
    { 162, 1, { { 14232813, 14232886 } } },
    { 172, 2, { { 3619619, 3619627 },
                { 3619984, 3620048  } } },
    { 642, 1, { { 15291546, 15291622 } } },
    { 670, 2, { { 3950665, 3950724 },
                { 3951436, 3951453 } } }
  };

  int num_cases = sizeof(cases) / sizeof(struct SpanAssertion);
  int read_num = 0;
  int case_num = 0;
  CigarCursor curs;
  while (case_num < num_cases &&
         samread(samfile, rec) > 0) {

    if (cases[case_num].read_num == read_num) {

      int num_spans = cases[case_num].num_spans;
      Span *span;

      init_cigar_cursor(&curs, rec);

      for (span = cases[case_num].spans; span < cases[case_num].spans + num_spans; span++) {

        assert_equals(1, next_span(&curs), "Should have found a span");
        assert_equals(span->start, curs.start, "Start");
        assert_equals(span->end, curs.end, "End");
      }

      assert_equals(0, next_span(&curs), "No more spans");

      case_num++;
    }
    read_num++;
  }

  if (case_num < num_cases)
    assert_equals(0, 1, "Ran out of records in sam file");
}
Beispiel #26
0
void test_next_fragment_single() {
  char *sam_filename = "testdata/test_next_fragment_single.sam";
  samfile_t *samfile = samopen(sam_filename, "r", NULL);  
  bam1_t *reads[] = { bam_init1(), bam_init1() };

  int len;

  len = next_fragment(reads, samfile, 2);
  assert_equals(1, len, "Num reads");
  assert_str_equals("seq.1", bam1_qname(reads[0]), "qname");

  len = next_fragment(reads, samfile, 2);
  assert_equals(1, len, "Num reads");
  assert_str_equals("seq.2", bam1_qname(reads[0]), "qname");

  len = next_fragment(reads, samfile, 2);
  assert_equals(0, len, "Num reads");

  samclose(samfile);
}
Beispiel #27
0
void hash_reads( table* T, const char* reads_fn, interval_stack* is )
{
    samfile_t* reads_f = samopen( reads_fn, "rb", NULL );
    if( reads_f == NULL ) {
        failf( "Can't open bam file '%s'.", reads_fn );
    }

    bam_index_t* reads_index = bam_index_load( reads_fn );
    if( reads_index == NULL ) {
        failf( "Can't open bam index '%s.bai'.", reads_fn );
    }

    bam_init_header_hash( reads_f->header );

    table_create( T, reads_f->header->n_targets );
    T->seq_names = (char**)malloc( sizeof(char*) * reads_f->header->n_targets );
    size_t k;
    for( k = 0; k < reads_f->header->n_targets; k++ ) {
        T->seq_names[k] = strdup(reads_f->header->target_name[k]);
    }

    log_puts( LOG_MSG, "hashing reads ... \n" );
    log_indent();
    bam_iter_t read_iter;
    bam1_t* read = bam_init1();
    int tid;

    interval_stack::iterator i;
    for( i = is->begin(); i != is->end(); i++ ) {
        tid = bam_get_tid( reads_f->header, i->seqname );
        if( tid < 0 ) continue;

        read_iter = bam_iter_query( reads_index, tid,
                                    i->start, i->end );

        while( bam_iter_read( reads_f->x.bam, read_iter, read ) >= 0 ) {
            if( bam1_strand(read) == i->strand ) {
                table_inc( T, read );
            }
        }

        bam_iter_destroy(read_iter);
    }

    bam_destroy1(read);

    log_unindent();
    log_printf( LOG_MSG, "done. (%zu unique reads hashed)\n", T->m );


    bam_index_destroy(reads_index);
    samclose(reads_f);
}
Beispiel #28
0
int main(int argc, char *argv[])
{
     char *progname;

     char *bamfilename;
     int32_t tid;

     samfile_t *bamin;
     bam_index_t *bamidx;
     bam_plbuf_t *buf;
     bam1_t *bam_read;
     uint32_t next_pos = 1;

     progname = *argv;
     argv++; argc--;
     if (argc < 2) {
          printf("Usage: %s bam_file tid\n", progname);
          exit(1);
     }
     else {
          bamfilename = argv[0];
          tid = strtol(argv[1], NULL, 10);
     }

     /* try to open bam file */
     bamin = samopen(bamfilename, "rb", NULL);
     if (!bamin) {
          fprintf(stderr, "Error opening bamfile %s\n", bamfilename);
          exit(1);
     }
     /* try to open index */
     bamidx = bam_index_load(bamfilename);
     if (!bamidx) {
          fprintf(stderr, "Error opening index for %s\n", bamfilename);
          exit(1);
     }
     bam_read = bam_init1();

     buf = bam_plbuf_init(&pileup_func, &next_pos);
     /* disable maximum pileup depth */
     bam_plp_set_maxcnt(buf->iter, INT_MAX);
     bam_fetch(bamin->x.bam, bamidx,
               tid, 0, INT_MAX,
               buf, &fetch_func);
     bam_plbuf_push(0, buf);    /* finish pileup */

     bam_plbuf_destroy(buf);
     bam_destroy1(bam_read);
     bam_index_destroy(bamidx);
     samclose(bamin);
     return 0;
}
Beispiel #29
0
int main(int argc, char** argv)
{
    if(argc < 3) {
        printf("No input nor output files provided");
        return -1;
    }

    bamFile in = bam_open(argv[1], "r");
    bam_header_t* header;
    if (in == NULL) {
        printf("opening input file failed");
        return -1;
    }

    bam1_t* b = bam_init1();

    bamFile out = bam_open(argv[2], "w");
    if (out == NULL) {
        printf("opening input file failed");
        return -1;
    }

    header = bam_header_read(in);
    if(bam_header_write(out, header) < 0) {
        printf("writing header failed");
    }

    long nextPrunedId;
    if(!scanf ("%lu", &nextPrunedId)) {
        printf("warning: no ids provided");
        return -1;
    }
    long id = 0;
    while (bam_read1(in, b) >= 0) {
        // write BAM back
        if (nextPrunedId != id++) {
            bam_write1(out, b);
        } else {
            // fprintf(stderr, "pruning: id: %lu, pos: %d, length: %d\n", nextPrunedId, b->core.pos, b->core.l_qseq);
            if(!scanf ("%lu", &nextPrunedId)) {
                break;
            }
        }
    }

    // closing all resources
    bam_header_destroy(header);
    bam_close(in);
    bam_close(out);
    bam_destroy1(b);
    return 0;
}
Beispiel #30
0
int main(int argc, char *argv[])
{
    samfile_t *fp;
    if ((fp = samopen(argv[1], "rb", 0)) == 0) {
        fprintf(stderr, "showbam: Fail to open BAM file %s\n", argv[1]);
        return 1;
    }
    bam1_t *b = bam_init1();
    while (samread(fp, b) >= 0) fetch_func(b);
    bam_destroy1(b);
    samclose(fp);
    return 0;
}