Esempio n. 1
0
static void _check_quality(char *OUTPUT_PREFIX, int WRITE_LOWQ, int WRITE_SPLITREAD, int MAPPING_QUALITY, int MIN_ALIGNED_PCT, int IGNORE_DUPLICATES) {
  bam1_t *b1 = bam_init1(), *b2 = bam_init1();;
  if (WRITE_SPLITREAD) {
    samfile_t *split_file = b2g_samfile_open("%s_splitread.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(split_file, b1)) {
      samread(split_file, b2);
      assert(b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(split_file);
  }
  if (WRITE_LOWQ) {
    samfile_t *lowq_file = b2g_samfile_open("%s_lowqual.bam", "rb", 0, OUTPUT_PREFIX);
    while (-1 < samread(lowq_file, b1)) {
      samread(lowq_file, b2);
      if (WRITE_SPLITREAD) {
	assert(!b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
	assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
      }
      else assert(!b2g_bams_highq(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES) || b2g_bam_pair_split(b1, b2, MAPPING_QUALITY, MIN_ALIGNED_PCT, IGNORE_DUPLICATES));
    }
    samclose(lowq_file);
  }

  bam_destroy1(b1);
  bam_destroy1(b2);
}
BamMerge::BamMerge(const vector<string>& bam_fnames,
		   vector<int64_t> file_offsets) :
  _bam_fnames(bam_fnames),
  _lines(less_bam(true)),
  _last_id(0)
{
  if (bam_fnames.size() <= 0)
    return;
  
  for (size_t i = 0; i < _bam_fnames.size(); ++i) {
    const char* fname = _bam_fnames[i].c_str();
    samfile_t* fp = samopen(fname, "rb", 0);
    if (fp==0) {
      warn_msg(ERR_BAM_OPEN, fname);
      exit(1);
    }

    if (bam_fnames.size() == file_offsets.size() &&
	file_offsets[i] > 0)
      bgzf_seek(fp->x.bam, file_offsets[i], SEEK_SET);

    bam1_t* b = bam_init1();
    if (samread(fp, b) > 0) {
      _src_files.push_back(fp);
      CBamLine brec(_lines.size(), b, fp->header);
      _lines.push(brec);
    }
    else { bam_destroy1(b); }
  }

  if (_lines.size() == 0) {
    warn_msg("Warning: no input BAM records found.\n");
    exit(1);
  }
}
Esempio n. 3
0
void samToOpenBed(char *samIn, FILE *f)
/* Like samToOpenBed, but the output is the already open file f. */
{
    samfile_t *sf = samopen(samIn, "r", NULL);
    bam_header_t *bamHeader = sf->header;
    bam1_t one;
    ZeroVar(&one);
    int err;
    while ((err = samread(sf, &one)) >= 0)
    {
        int32_t tid = one.core.tid;
        if (tid < 0)
            continue;
        char *chrom = bamHeader->target_name[tid];
        // Approximate here... can do better if parse cigar.
        int start = one.core.pos;
        int size = one.core.l_qseq;
        int end = start + size;
        boolean isRc = (one.core.flag & BAM_FREVERSE);
        char strand = '+';
        if (isRc)
        {
            strand = '-';
            reverseIntRange(&start, &end, bamHeader->target_len[tid]);
        }
        fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    }
    if (err < 0 && err != -1)
        errnoAbort("samread err %d", err);
    samclose(sf);
}
Esempio n. 4
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        inc_hash_table(T, bam1_qname(b), b->core.l_qname);
    }

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Esempio n. 5
0
static int fill_buf(samfile_t *in, buffer_t *buf)
{
	int i, ret, last_tid, min_rpos = 0x7fffffff, capacity;
	bam1_t *b = bam_init1();
	bam1_core_t *c = &b->core;
	// squeeze out the empty cells at the beginning
	for (i = 0; i < buf->n; ++i)
		if (buf->buf[i].b) break;
	if (i < buf->n) { // squeeze
		if (i > 0) {
			memmove(buf->buf, buf->buf + i, sizeof(elem_t) * (buf->n - i));
			buf->n = buf->n - i;
		}
	} else buf->n = 0;
	// calculate min_rpos
	for (i = 0; i < buf->n; ++i) {
		elem_t *e = buf->buf + i;
		if (e->b && e->rpos >= 0 && e->rpos < min_rpos)
			min_rpos = buf->buf[i].rpos;
	}
	// fill the buffer
	buf->x = -1;
	last_tid = buf->n? buf->buf[0].b->core.tid : -1;
	capacity = buf->n + BLOCK_SIZE;
	while ((ret = samread(in, b)) >= 0) {
		elem_t *e;
		uint8_t *qual = bam1_qual(b);
		int is_mapped;
		if (last_tid < 0) last_tid = c->tid;
		if (c->tid != last_tid) {
			if (buf->x < 0) buf->x = buf->n;
		}
		if (buf->n >= buf->max) { // enlarge
			buf->max = buf->max? buf->max<<1 : 8;
			buf->buf = (elem_t*)realloc(buf->buf, sizeof(elem_t) * buf->max);
		}
		e = &buf->buf[buf->n++];
		e->b = bam_dup1(b);
		e->rpos = -1; e->score = 0;
		for (i = 0; i < c->l_qseq; ++i) e->score += qual[i] + 1;
		e->score = (double)e->score / sqrt(c->l_qseq + 1);
		is_mapped = (c->tid < 0 || c->tid >= in->header->n_targets || (c->flag&BAM_FUNMAP))? 0 : 1;
		if (!is_mapped) e->score = -1;
		if (is_mapped && (c->flag & BAM_FREVERSE)) {
			e->rpos = b->core.pos + bam_calend(&b->core, bam1_cigar(b));
			if (min_rpos > e->rpos) min_rpos = e->rpos;
		}
		if (buf->n >= capacity) {
			if (is_mapped && c->pos <= min_rpos) capacity += BLOCK_SIZE;
			else break;
		}
	}
	if (ret >= 0 && buf->x < 0) buf->x = buf->n;
	bam_destroy1(b);
	return buf->n;
}
Esempio n. 6
0
static int _count_reads(char *path) {
  int count = 0;
  samfile_t *bamfile = b2g_samfile_open(path, "rb", 0);
  if (!bamfile) return 0;
  bam1_t *bam = bam_init1();
  while (-1 < samread(bamfile, bam)) count++;
  bam_destroy1(bam);
  samclose(bamfile);
  return count;
}
Esempio n. 7
0
bam1_t * SAM_istream::read() throw (SAM_IO_Error) {
	if (sam_file == NULL) {
		throw SAM_IO_Error(SAM_IO_Error::file_not_opened, "tried to read from a not previously opened file");
	}
	bam1_t * b = bam_init1();
	int bytes = samread(sam_file,b);
	if (bytes == -1)
		end_of_file = true;
	return b;
}
Esempio n. 8
0
void test_cigar_to_spans() {
  char *sam_filename = "testdata/RUM.sam";
  samfile_t *samfile = samopen(sam_filename, "r", NULL);  
  bam1_t *rec = bam_init1();

  struct SpanAssertion {
    int read_num;
    int num_spans;
    struct Span spans[10];
  };

  struct SpanAssertion cases[] = {
    { 102, 1, { { 12465667, 12465724 } } },
    { 104, 1, { { 2095233, 2095289 } } },
    { 128, 1, { { 152316, 152373 } } },
    { 162, 1, { { 14232813, 14232886 } } },
    { 172, 2, { { 3619619, 3619627 },
                { 3619984, 3620048  } } },
    { 642, 1, { { 15291546, 15291622 } } },
    { 670, 2, { { 3950665, 3950724 },
                { 3951436, 3951453 } } }
  };

  int num_cases = sizeof(cases) / sizeof(struct SpanAssertion);
  int read_num = 0;
  int case_num = 0;
  CigarCursor curs;
  while (case_num < num_cases &&
         samread(samfile, rec) > 0) {

    if (cases[case_num].read_num == read_num) {

      int num_spans = cases[case_num].num_spans;
      Span *span;

      init_cigar_cursor(&curs, rec);

      for (span = cases[case_num].spans; span < cases[case_num].spans + num_spans; span++) {

        assert_equals(1, next_span(&curs), "Should have found a span");
        assert_equals(span->start, curs.start, "Start");
        assert_equals(span->end, curs.end, "End");
      }

      assert_equals(0, next_span(&curs), "No more spans");

      case_num++;
    }
    read_num++;
  }

  if (case_num < num_cases)
    assert_equals(0, 1, "Ran out of records in sam file");
}
Esempio n. 9
0
hash_table* hash_ids(const char* fn)
{
    fprintf(stderr, "hashing ... \n");

    hash_table* T = create_hash_table();

    samfile_t* f = samopen(fn, "rb", NULL);
    if (f == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    bam1_t* b = bam_init1();

    uint32_t n = 0;

    char* qname = NULL;
    size_t qname_size = 0;

    while (samread(f, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (qname_size < b->core.l_qname + 3) {
            qname_size = b->core.l_qname + 3;
            qname = realloc(qname, qname_size);
        }

        memcpy(qname, bam1_qname(b), b->core.l_qname);

        if (b->core.flag & BAM_FREAD2) {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '2';
            qname[b->core.l_qname + 2] = '\0';
        }
        else {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '1';
            qname[b->core.l_qname + 2] = '\0';
        }


        inc_hash_table(T, qname, b->core.l_qname + 2);
    }

    free(qname);

    bam_destroy1(b);
    samclose(f);

    fprintf(stderr, "done.\n");
    return T;
}
Esempio n. 10
0
int main(int argc, char *argv[])
{
    samfile_t *fp;
    if ((fp = samopen(argv[1], "rb", 0)) == 0) {
        fprintf(stderr, "showbam: Fail to open BAM file %s\n", argv[1]);
        return 1;
    }
    bam1_t *b = bam_init1();
    while (samread(fp, b) >= 0) fetch_func(b);
    bam_destroy1(b);
    samclose(fp);
    return 0;
}
Esempio n. 11
0
void edwSamRepeatAnalysis(char *inSam, char *outRa)
/* edwSamRepeatAnalysis - Analyze result of alignment vs. RepeatMasker type libraries.. */
{
/* Go through sam file, filling in hiLevelHash with count of each hi level repeat class we see. */
struct hash *hiLevelHash = hashNew(0);
samfile_t *sf = samopen(inSam, "r", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0;
while ((err = samread(sf, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;

    /* Parse out hiLevel classification from target,  which is something like 7SLRNA#SINE/Alu 
     * from which we'd want to extract SINE.  The '/' is not present in all input. */
    char *target = bamHeader->target_name[tid];
    char *hashPos = strchr(target, '#');
    if (hashPos == NULL)
        errAbort("# not found in target %s", target);
    char *hiLevel = cloneString(hashPos + 1);
    char *slashPos = strchr(hiLevel, '/');
    if (slashPos != NULL)
        *slashPos = 0;

    hashIncInt(hiLevelHash, hiLevel);
    }
samclose(sf);

/* Output some basic stats as well as contents of hash */
FILE *f = mustOpen(outRa, "w");
double invTotal = 1.0 / (hit + miss);
double mapRatio = (double)hit * invTotal;
struct hashEl *hel, *helList = hashElListHash(hiLevelHash);
slSort(&helList, hashElCmp);
for (hel = helList; hel != NULL; hel = hel->next)
    {
    double hitRatio = ptToInt(hel->val) * invTotal;
    fprintf(f, "%s %g\n", hel->name, hitRatio);
    }
fprintf(f, "total %g\n", mapRatio);
carefulClose(&f);
}
int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
{
	bam_plbuf_t *buf;
	int ret;
	bam1_t *b;
	b = bam_init1();
	buf = bam_plbuf_init(func, func_data);
	bam_plbuf_set_mask(buf, mask);
	while ((ret = samread(fp, b)) >= 0)
		bam_plbuf_push(b, buf);
	bam_plbuf_push(0, buf);
	bam_plbuf_destroy(buf);
	bam_destroy1(b);
	return 0;
}
Esempio n. 13
0
void test_cigar_to_spans2() {

  char *sam_filename = "testdata/cigar_bug.sam";
  samfile_t *samfile = samopen(sam_filename, "r", NULL);  
  bam1_t *rec = bam_init1();

  struct SpanAssertion {
    int read_num;
    int num_spans;
    struct Span spans[10];
  };

  struct SpanAssertion cases[] = {
    { 0, 2, { { 46383142, 46383163 },
              { 46384677, 46384749 } } }
  };

  int num_cases = sizeof(cases) / sizeof(struct SpanAssertion);
  int read_num = 0;
  int case_num = 0;
  CigarCursor curs;
  while (case_num < num_cases &&
         samread(samfile, rec) > 0) {

    if (cases[case_num].read_num == read_num) {

      int num_spans = cases[case_num].num_spans;
      Span *span;

      init_cigar_cursor(&curs, rec);

      for (span = cases[case_num].spans; span < cases[case_num].spans + num_spans; span++) {

        if (next_span(&curs)) {
          assert_equals(span->start, curs.start, "Start");
          assert_equals(span->end, curs.end, "End");
        }
        
      }

      assert_equals(0, next_span(&curs), "No more spans");

      case_num++;
    }
    read_num++;
  }

}
Esempio n. 14
0
int gt_samfile_iterator_next(GtSamfileIterator *s_iter,
                             GtSamAlignment **s_alignment)
{
  int read;
  if (s_iter->current_alignment == NULL)
    s_iter->current_alignment = gt_sam_alignment_new(s_iter->alphabet);
  read = samread(s_iter->samfile, s_iter->current_alignment->s_alignment);
  if (read > 0) {
    *s_alignment = s_iter->current_alignment;
    return read;
  }
  else {
    *s_alignment = NULL;
    return read;
  }
}
int main(int argc, char* argv[]) {
    samfile_t *ifile = NULL, *ofile = NULL;
    bam1_t *read = bam_init1();
    int keep = 0;
    char *p = NULL;

    //Open input file, either SAM or BAM
    p = strrchr(argv[1], '.');
    if(strcmp(p, ".bam") == 0) {
        ifile = samopen(argv[1], "rb", NULL);
    } else {
        ifile = samopen(argv[1], "r", NULL);
    }

    bam_header_t *head = ifile->header;

    //Open output file
    // ofile = samopen("AND_type.bam", "wb", ifile->header);
    ofile = samopen(argv[2], "wb", ifile->header);


    //Iterate through the lines
    while(samread(ifile, read) > 1) {
        keep = 0;
        //Is the read's mate on the same chromosome/contig?
        if(read->core.tid == read->core.mtid) {
            //Are the mates on opposite strands?
            if(read->core.flag & BAM_FREVERSE && !(read->core.flag & BAM_FMREVERSE)) {
                if(read->core.pos < read->core.mpos) {
                    // Are mates 500 bp or less from the ends?
                    if (read-> core.pos <= 500 && read->core.mpos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            } else if(!(read->core.flag & BAM_FREVERSE) && read->core.flag & BAM_FMREVERSE) {
                if(read->core.mpos < read->core.pos) {
                    if (read-> core.mpos <= 500 && read->core.pos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            }
        }
        if(keep) samwrite(ofile, read);
    }
    bam_destroy1(read);
    samclose(ifile);
    samclose(ofile);
    return 0;
}
Esempio n. 16
0
int _walk_through_sam_and_split(samfile_t * fin, samfile_t **foutList)
{
    bam1_t *b = bam_init1();
    int r, count = 0;

    while (0 <= (r = samread(fin, b))) {
      if(b->core.tid > -1){
        samwrite(foutList[b->core.tid], b);
      }else{
        samwrite(foutList[fin->header->n_targets], b);
      }
      count++;
    }
    bam_destroy1(b);

    return r >= -1 ? count : -1 * count;
}
Esempio n. 17
0
static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
    long long *retMiss,  long long *retTotalBasesInHits)
/* Scan through sam file doing several things:counting how many reads hit and how many 
 * miss target during mapping phase, copying those that hit to a little bed file, and 
 * also defining regions covered in a genomeRangeTree. */
{
samfile_t *sf = samopen(samIn, "r", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0, totalBasesInHits = 0;
while ((err = samread(sf, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;
    char *chrom = bamHeader->target_name[tid];
    // Approximate here... can do better if parse cigar.
    int start = one.core.pos;
    int size = one.core.l_qseq;
    int end = start + size;	
    totalBasesInHits += size;
    boolean isRc = (one.core.flag & BAM_FREVERSE);
    char strand = '+';
    if (isRc)
	{
	strand = '-';
	reverseIntRange(&start, &end, bamHeader->target_len[tid]);
	}
    if (start < 0) start=0;
    if (f != NULL)
	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    genomeRangeTreeAdd(grt, chrom, start, end);
    }
if (err < 0 && err != -1)
    errnoAbort("samread err %d", err);
samclose(sf);
*retHit = hit;
*retMiss = miss;
*retTotalBasesInHits = totalBasesInHits;
}
Esempio n. 18
0
int32_t
tmap_sam_io_read(tmap_sam_io_t *samio, tmap_sam_t *sam)
{
  if(NULL != sam->b) {
      bam_destroy1(sam->b);
  }
  sam->b = bam_init1();

  // check if we're past optional end bam virtual file offset
  if (samio->bam_end_vfo > 0) {
      BGZF* bgzf_fp = samio->fp->x.bam;
      if (bam_tell(bgzf_fp) >= samio->bam_end_vfo) {
         fprintf(stderr, "stopping at bam virtual file offset %lu\n", samio->bam_end_vfo);
         return -1;
      }
  }

  if(0 < samread(samio->fp, sam->b)) {
      char *str;
      int32_t i, len;

      // name
      str = bam1_qname(sam->b);
      len = strlen(str);
      tmap_sam_io_update_string(&sam->name, str, len);
      sam->name->s[len] = '\0';
      // seq and qual
      len = sam->b->core.l_qseq;
      tmap_sam_io_update_string(&sam->seq, NULL, len);
      tmap_sam_io_update_string(&sam->qual, (char*)bam1_qual(sam->b), len);
      for(i=0;i<len;i++) {
          sam->seq->s[i] = bam_nt16_rev_table[bam1_seqi(bam1_seq(sam->b), i)];
          sam->qual->s[i] = QUAL2CHAR(sam->qual->s[i]);
      }
      sam->seq->s[len] = sam->qual->s[len] = '\0';
      // reverse compliment if necessary
      if((sam->b->core.flag & BAM_FREVERSE)) {
          tmap_sam_reverse_compliment(sam);
      }
      return 1;
  }
  
  return -1;
}
Esempio n. 19
0
int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
{
    bam_plbuf_t *buf;
    int ret;
    bam1_t *b;
    b = bam_init1();
    buf = bam_plbuf_init(func, func_data);
    if (mask < 0) mask = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    else mask |= BAM_FUNMAP;
    while ((ret = samread(fp, b)) >= 0) {
        // bam_plp_push() itself now filters out unmapped reads only
        if (b->core.flag & mask) b->core.flag |= BAM_FUNMAP;
        bam_plbuf_push(b, buf);
    }
    bam_plbuf_push(0, buf);
    bam_plbuf_destroy(buf);
    bam_destroy1(b);
    return 0;
}
Esempio n. 20
0
int sam_fetch(char *ifn, char *ofn, char *reg, void *data, sam_fetch_f func) {
	int ret = 0;
	samfile_t *in = samopen(ifn, "rb", 0);
	samfile_t *out = 0;
	if (ofn) out = samopen(ofn, "wb", in->header);

	if (reg) {
		bam_index_t *idx = bam_index_load(ifn);
		if (idx == 0) {
			fprintf(stderr, "[%s:%d] Random alignment retrieval only works for indexed BAM files.\n",
							__func__, __LINE__);
			exit(1);
		}
		int tid, beg, end;
		bam_parse_region(in->header, reg, &tid, &beg, &end);
		if (tid < 0) {
			fprintf(stderr, "[%s:%d] Region \"%s\" specifies an unknown reference name. \n",
							__func__, __LINE__, reg);
			exit(1);
		}
		bam_iter_t iter;
		bam1_t *b = bam_init1();
		iter = bam_iter_query(idx, tid, beg, end);
		while ((ret = bam_iter_read(in->x.bam, iter, b)) >= 0) func(b, in, out, data);
		bam_iter_destroy(iter);
		bam_destroy1(b);
		bam_index_destroy(idx);
	} else {
		bam1_t *b = bam_init1();
		while ((ret = samread(in, b)) >= 0) func(b, in, out, data);
		bam_destroy1(b);
	}
	if (out) samclose(out);
	samclose(in);
			
	if (ret != -1) {					/* truncated is -2 */
		fprintf(stderr, "[%s:%d] Alignment retrieval failed due to truncated file\n",
						__func__, __LINE__);
		exit(1);
	}

	return ret;
}
Esempio n. 21
0
void build_wiggles(const std::string& bam_filename,
                   WiggleProcessor& processor) {
  
    samfile_t *bam_in = samopen(bam_filename.c_str(), "r", NULL);
    general_assert(bam_in != NULL, "Cannot open " + bam_filename + "!");

    bam_hdr_t *header = bam_in->header;
    bool *used = new bool[header->n_targets];
    memset(used, 0, sizeof(bool) * header->n_targets);

    int cur_tid = -1; //current tid;
    HIT_INT_TYPE cnt = 0;
    bam1_t *b = bam_init1();
    Wiggle wiggle;
    while (samread(bam_in, b) >= 0) {
      if (bam_is_unmapped(b)) continue;
      
      if (b->core.tid != cur_tid) {
	if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
	cur_tid = b->core.tid;
	wiggle.name = header->target_name[cur_tid];
	wiggle.length = header->target_len[cur_tid];
	wiggle.read_depth.assign(wiggle.length, 0.0);
      }
      add_bam_record_to_wiggle(b, wiggle);
      ++cnt;
      if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl;
    }
    if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }
    
    for (int32_t i = 0; i < header->n_targets; i++)
      if (!used[i]) {
	wiggle.name = header->target_name[i];
	wiggle.length = header->target_len[i];
	wiggle.read_depth.clear();
	processor.process(wiggle);
      }

    bam_destroy1(b);
    samclose(bam_in);

    delete[] used;
}
Esempio n. 22
0
bool
bam_streamer::
next()
{
    if (NULL==_bfp) return false;

    int ret;
    if (NULL == _biter)
    {
        ret = samread(_bfp, _brec._bp);
    }
    else
    {
        ret = bam_iter_read(_bfp->x.bam, _biter, _brec._bp);
    }

    _is_record_set=(ret >= 0);
    if (_is_record_set) _record_no++;

    return _is_record_set;
}
Esempio n. 23
0
bool
bam_streamer::
next()
{
    if (nullptr == _bfp) return false;

    int ret;
    if (nullptr == _hitr)
    {
        ret = samread(_bfp, _brec._bp);
    }
    else
    {
        ret = sam_itr_next(_bfp->file, _hitr, _brec._bp);
    }

    _is_record_set=(ret >= 0);
    if (_is_record_set) _record_no++;

    return _is_record_set;
}
Esempio n. 24
0
int add_dindel(const char *bam_in, const char *bam_out, const char *ref)
{
	data_t_dindel tmp;
    int count = 0;
    bam1_t *b = NULL;

	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
             return 1;
        }
    if ((tmp.fai = fai_load(ref)) == 0) {
         LOG_FATAL("Failed to open reference file %s\n", ref);
         return 1;
    }
    /*warn_old_fai(ref);*/

    if (!bam_out || bam_out[0] == '-') {
         tmp.out = bam_dopen(fileno(stdout), "w");
    } else {
         tmp.out = bam_open(bam_out, "w");
    }
    bam_header_write(tmp.out, tmp.in->header);
    
    b = bam_init1();
    tmp.tid = -1;
    tmp.hpcount = 0;
    tmp.rlen = 0;
    while (samread(tmp.in, b) >= 0) {
         count++;
         dindel_fetch_func(b, &tmp); 
    }
    bam_destroy1(b);
    
    if (tmp.hpcount) free(tmp.hpcount);
    samclose(tmp.in);
    bam_close(tmp.out);
    fai_destroy(tmp.fai);
	LOG_VERBOSE("Processed %d reads\n", count);
	return 0;
}
Esempio n. 25
0
void filter_by_id(const char* fn, hash_table* T)
{
    fprintf(stderr, "filtering ... \n");

    samfile_t* fin = samopen(fn, "rb", NULL);
    if (fin == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    samfile_t* fout = samopen("-", "w", (void*)fin->header);
    if (fout == NULL) {
        fprintf(stderr, "can't open stdout, for some reason.\n");
        exit(1);
    }

    fputs(fin->header->text, stdout);

    bam1_t* b = bam_init1();
    uint32_t n = 0;

    while (samread(fin, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (get_hash_table(T, bam1_qname(b), b->core.l_qname) == 1) {
            samwrite(fout, b);
        }
    }

    bam_destroy1(b);
    samclose(fout);
    samclose(fin);

    fprintf(stderr, "done.\n");
}
Esempio n. 26
0
int main()
{	
	samfile_t *in = 0, *out = 0;
        int slx2sngr = 0;
        char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0;
        strcpy(in_mode, "r"); strcpy(out_mode, "w");
        strcat(in_mode, "b");
                printf("start******************************************************\n");
        char *filename = "/ifs1/RD/shaohaojing/DAI/1000306/1000306_HUMgqbRLJDIAAPE_091015_I58_FC42UC6AAXX_L2_HUMgqbRLJDIAAPE_1.fq.bam.sort.bam";
        if ((in = samopen(filename, in_mode, fn_list)) == 0)
        {
                fprintf(stderr, "[main_samview] fail to open file for reading.\n");
                exit(0);
        }
        if (in->header == 0) {
                fprintf(stderr, "[main_samview] fail to read the header.\n");
                exit(0);
        }
        if ((out = samopen(fn_out? fn_out : "-", out_mode, in->header)) == 0) {
                fprintf(stderr, "[main_samview] fail to open file for writing.\n");
                exit(0);
        }

        bam1_t *b = bam_init1();
        int r;
        char *s;
        while((r = samread(in, b)) >= 0)
        {
                if (!__g_skip_aln(in->header, b)) {
                        s = bam_format1_core(out->header, b, out->type>>2&3);
                }
                printf("%s\n", s);
                free(s);
//                printf("%s", b->data);
//                printf("\n++++++++++%d+++++++++++", r);
//              printf("********************\n");
        }
Esempio n. 27
0
/**
 * DATE: 2010-7-29
 * FUNCTION: read a line from sam/bam file.
 * PARAMETER: line: the read data will stored in this varible.
 * RETURN: the line size when read successful. -1 when read failed
 */
int SamCtrl::readline(std::string &line) 
{
	if (m_in == 0) 
	{
		return -1;
	}

	int ret = 0;
	// begin to read
	while ((ret = samread(m_in, m_b)) >= 0) 
	{
		// when read failed continue
		if (__g_skip_aln(m_in->header, m_b)) 
		{
			continue;
        }

		m_s = bam_format1_core(m_out->header, m_b, m_out->type>>2&3); // read the buffer
		line = m_s; // store into the line
		free(m_s);
		return line.size();
	}
	return -1; 
}
Esempio n. 28
0
int add_uniform(const char *bam_in, const char *bam_out,
                const int ins_qual, const int del_qual)
{
	data_t_uniform tmp;
    uint8_t iq = ENCODE_Q(ins_qual+33);
    uint8_t dq = ENCODE_Q(del_qual+33);
    bam1_t *b = NULL;
    int count = 0;

	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
         return 1;
    }

    tmp.iq = iq;
    tmp.dq = dq;

    if (!bam_out || bam_out[0] == '-') {
         tmp.out = bam_dopen(fileno(stdout), "w");
    } else {
         tmp.out = bam_open(bam_out, "w");
    }
    bam_header_write(tmp.out, tmp.in->header);
    
    b = bam_init1();
    while (samread(tmp.in, b) >= 0) {
         count++;
         uniform_fetch_func(b, &tmp); 
    }
    bam_destroy1(b);
    
    samclose(tmp.in);
    bam_close(tmp.out);
    LOG_VERBOSE("Processed %d reads\n", count);
    return 0;
}
Esempio n. 29
0
/**
* @brief Major file/read parsing function
*
* @param cptr Pointer to calloc initialized scores. Uses samtools API as opposed to seq_density -> faster
* @param databl_start Pointer to genomic start positions of data blocks
* @param databl_end Pointer to genomic end positions of data blocks
* @param user_args Structure holding all user input
* @param cs Pointer to chromosome_size structure holding chromosome dimensions
* @param bam_file Pointer to samfile_t structure holding the file handler
* @param first_call Is this the first call to this function?
* @return Structure seq_block with results of the read scanning such as mapmass and file status
* @details Major function looping through every read in file until a new chromosome starts. Updates cptr scores and returns statistics on reads
* @note Very long. Should be refactored?
* @todo
*/
seq_block_t seq_density(usersize *cptr, uint32_t *databl_start, uint32_t *databl_end, user_arguments_t *user_args,
		chromosome_size_t *cs, samfile_t *bam_file, int *first_call)
/*reads all reads from bam until a new chromosome is reached and sets first read infos stored in the struct fr.
 * returns a flag about the parse status and writes the scores to cptr*/
{
	int abs_gen_end;
	uint32_t nindex=0,*ind_start,*ind_end,max_index=cs->min_indexspace;
	usersize * cptr_beg=cptr;
	seq_block_t bresults={0};
	read_metrics_t rm={0};
	static buffered_read_t fr={0};
	if(*first_call)fr.cigar=(uint32_t *)Calloc(MAX_NCIGAR,uint32_t);

	int lpos=fr.pos,last_end=fr.pos+fr.l_seq;
	uint32_t BUFFERLIMIT=cs->max_pos+1;//there are no more than this many items calloc'ed
	uint32_t INDEXLIMIT=cs->min_indexspace;
	ind_start=databl_start;ind_end=databl_end;//note the start of pointers

	while(1){//heavy main loop will go through this n=[amount of reads] times!
		bam1_t * current_read=bam_init1();
		/* ######### SCAN BLOCK ################## */
		bresults.file_status=samread(bam_file,current_read);
		if (bresults.file_status == -1){// EOF
			if (*first_call){// EOF
				warning("No compatible read found for this settings!\n");
				if(!bresults.paired && user_args->PAIRED)warning("No proper pair [flag 2] found in this file. \nSet 'paired_only' to FALSE\n");
				bresults.file_status = -10;
				return bresults;
			}
			#if verbose==1
			printf("EOF detected -> %d read(s) screened!\n",bresults.total_reads);
			#endif
			cs->max_pos=last_end;//set maximal absolut position
			*(databl_end++)=cs->max_pos;//completes the missing end entry to be on par with start
			cs->min_scorespace+=*(databl_end-1)-*(databl_start-1)+1;

			cs->min_indexspace=nindex;
			*databl_end=0;*databl_start=0;//end flag
			databl_start=ind_start;databl_end=ind_end;//reset pointer to the beginning
			free_samio(&fr,current_read);
			bresults.file_status = 0;
			return bresults;
		}else if(bresults.file_status<-1){
			warning("File truncated!\n");
			if (!*first_call)free_samio(&fr,current_read);
			return bresults; // truncated
		}

		/* MAIN QUALITY CHECKPOINT */
		quality_check(&rm,current_read,user_args,&bresults,lpos);
		#if verbose==4
		print_readinfo(&bresults,current_read,&rm,bam_file);
		#endif
		if(rm.skip<0){
			bresults.file_status = rm.skip;
			return bresults;
		}else if(rm.skip)goto SKIP_READ;
		abs_gen_end=rm.genomic_end+user_args->EXTEND;
		/* ######### END SCAN ######################*/

		/*################  BOUNDARY CHECK ######################*/

		if(fr.chrom_index!=current_read->core.tid || *first_call){//Did we reach the end of the chromosome? If yes save data and return!
			if (!*first_call){
				cs->max_pos=last_end;//set maximal absolut position
				*databl_end++=cs->max_pos;//completes the missing end entry to be on par with start
				cs->min_scorespace+=*(databl_end-1)-*(databl_start-1)+1;
				cs->min_indexspace=nindex;
				*databl_end=0;*databl_start=0;//end flag
				databl_start=ind_start;databl_end=ind_end;//reset pointer to the beginning
			}
			*first_call=0;
			bresults.file_status=1;
			bresults.chrom_index_next=current_read->core.tid;
			store_read(&fr,current_read,&rm);
			return bresults;

		} else if(lpos>current_read->core.pos){
			bresults.file_status=-5;//something wrong with the positions
			warning("Last position>current position. File doesn't seem to be sorted!\n");
			free_samio(&fr,current_read);
			return bresults;
		} else if(BUFFERLIMIT<abs_gen_end || abs_gen_end < 0){
			//skip read if sequence out of bounce
			//possibly bad header with wrong chromosome margins or EXTEND too large!
			warning("BUFFER only %d\n But POS: %d cur_seq_len: %d EXTEND: %d -> %d \n GLOBAL %d\n",
					BUFFERLIMIT,current_read->core.pos,rm.read_length,user_args->EXTEND,
					current_read->core.pos+rm.read_length+user_args->EXTEND,abs_gen_end);
			#if pedantic==1
			bresults.file_status=-4;
			return bresults;
			#endif
			bam_destroy1(current_read);
			continue;
		}
		/*#####################################  WRITE  ###############*/

		cptr+=current_read->core.pos;//align pointer to current position
		if(user_args->READTHROUGH){
			write_density_ungapped(cptr,rm.read_length,&bresults.maxScore);
		}else{
			write_density_gapped(cptr,bam1_cigar(current_read),current_read->core.n_cigar,&bresults.maxScore);//minor speed panelty to ungapped
		}

		if(user_args->EXTEND>0){

			if(rm.revcomp){
				abs_gen_end-=user_args->EXTEND;
				if(cptr-user_args->EXTEND>cptr_beg)cptr-=user_args->EXTEND;
				else goto NOEXTEND;
			}else{
				cptr=cptr_beg;
				cptr+=rm.genomic_end;
			}
			int k=0;
			for(;k<user_args->EXTEND;++k){++*cptr;++cptr;}
		}
		NOEXTEND:

		cptr=cptr_beg;//jump back to the beginning of the chromosome using the helper

		if(!fr.written){//flush the first read of the chromosome stored in the struct fr
			cptr+=fr.pos;

			if(user_args->READTHROUGH){write_density_ungapped(cptr,fr.tlen,&bresults.maxScore);
			}else{write_density_gapped(cptr,fr.cigar,fr.n_cigar,&bresults.maxScore);}

			if(user_args->EXTEND>0){
				if(fr.revcomp){
					if(cptr-user_args->EXTEND>cptr_beg)cptr-=user_args->EXTEND;
					else goto FRNOEXTEND;
				}else{
					cptr=cptr_beg;
					cptr+=fr.genomic_end;
				}
				int k=0;
				for(;k<user_args->EXTEND;++k){++*cptr;++cptr;}
			}
			FRNOEXTEND:

			cptr=cptr_beg;//jump back to the beginning of the chromosome using the helper
			*databl_start++=fr.pos;++nindex;//genomic coordinate where the data block starts
			cs->min_pos=fr.pos;
			cs->min_scorespace=0;//will only be set based on the index
			cs->min_indexspace=nindex;
			cs->max_pos=abs_gen_end;
			lpos=fr.pos;
			last_end=fr.genomic_end;//load last read info of first read

			fr.written=1;//indicate that information has been used
		}

		/*################  INDEXING ######################*/

		cptr=cptr_beg;//jump back to the beginning of the chromosome using the helper
		if((current_read->core.pos-last_end)>=user_args->COMPRESSION){//check whether there was a large block without any data -> Triggers a jump on the sequence!
			nindex++;//add one index
			if(max_index<=nindex){//check whether we are already over the allocated index space
				printf("Index space found: %d > Index space allocated: %d !!",nindex,max_index);
				error("Error in indexing allocation detected!");
			}
			*databl_end++=last_end;//genomic coordinate where the data block ends | lags one index position behind start in the main loop!
			cs->min_scorespace+=*(databl_end-1)-*(databl_start-1)+1;//+1 because start=end is still one Bp!
			*databl_start++=current_read->core.pos;//genomic coordinate where the data block starts beginning with the current read
		}
		lpos=current_read->core.pos;
		last_end= user_args->READTHROUGH ? max(current_read->core.pos+rm.read_length,last_end) : max(abs_gen_end,last_end);
		SKIP_READ:
		if(nindex>=INDEXLIMIT && !first_call)error("Index overflow!\n");
		bam_destroy1(current_read);
	}//end of bam index parsing skip tag section

	bresults.file_status=-4;
	return bresults;//can never happen
}
Esempio n. 30
0
void mapper( char *ref, int length, int start_base_pos, const char *bam )
{  
    anal_t input;  
    gzFile pRef;
    kseq_t * seq = NULL;
    char chr[8] = { 0, };
    int ret;
    bam_plbuf_t *buf;
    bam1_t *b;

/*
    fprintf( stderr, "ref: %s\n", ref );
    fprintf( stderr, "length: %d\n", length );
    fprintf( stderr, "start_base_pos: %d\n", start_base_pos );
    fprintf( stderr, "bam: %s\n", bam );
*/
    input.beg = 0; input.end = 0x7fffffff;  
    input.in = samopen(bam, "rb", 0);
  
    if (input.in == 0) 
    {  
        fprintf(stderr, "Fail to open BAM file %s\n", bam);  
        return;  
    }  

    pRef = gzopen( ref, "r" );

        fprintf( stderr, "ref : %s\n", ref );
        fprintf( stderr, "pRef: %p\n", pRef );
    if( pRef == NULL )
    {
        fprintf( stderr, "ref : %s\n", ref );
        fprintf( stderr, "pRef: %p\n", pRef );

        return;
    }

    seq = kseq_init( pRef );

    b = bam_init1(); // alloc memory size of bam1_t
//fprintf( stderr, "%\pn", b );
    buf = bam_plbuf_init(pileup_func, &input); // alloc memory

    bam_plbuf_set_mask(buf, -1);
    
    while ((ret = samread( input.in, b)) >= 0)
    {   
        bam_plbuf_push(b, buf); 
        
//fprintf( stderr, "%x\n", b->core.flag );
        if( b->core.flag & 0x0004 ) // unmapped
        {    // do nothing
/*
            qname1 = strtok(bam1_qname(b), ":\t\n ");
            qname2 = strtok(NULL, ":\t\n ");
            qname3 = atoi(qname2);

            fprintf( stderr, "%s:%10d:%s:%d\t%c:%d:%d:%d\n", 
                qname1, qname3, "*", b->core.pos,
                '*', b->core.flag, b->core.qual, ret );
*/
            fprintf( stdout, "%s:%s:%d\t%c:0x%x:%d:%d\n", 
                bam1_qname(b), "*", b->core.pos+1,
                '*', b->core.flag, b->core.qual, ret );
/*
            fprintf( stderr, "%s:%s:%d\t%c:0x%x:%d:%d\n", 
                bam1_qname(b), "*", b->core.pos,
                '*', b->core.flag, b->core.qual, ret );
*/
        }
        else
        {
            // to find a base in the reference genome, seq.

            if( ( seq != NULL ) &&  
                ( strcmp( input.in->header->target_name[b->core.tid], chr ) == 0 ) )
            {
                // already found that 
                // fprintf( stderr, "found : %s\n", chr );
            }else
            {
                if( find_chr(input.in->header->target_name[b->core.tid], seq, chr) < 0 )
                {
                     fprintf( stderr, "ERROR : cannot find chromosome %s\n", \
                             input.in->header->target_name[b->core.tid] );
                }else
                {
                     fprintf( stderr, "FOUND CHR : %s\n", chr );
                }          
            }
            // remove not aligned to the chromosome

            fprintf( stdout, "%s:%s:%d\t%c:%d:%d:%d\n", 
                bam1_qname(b),
                input.in->header->target_name[b->core.tid], 
                b->core.pos+1,
                seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );

/*
            fprintf( stderr, "%s:%s:%d\t%c:%d:%d:%d\n", 
                bam1_qname(b),
                input.in->header->target_name[b->core.tid], 
                b->core.pos,
                seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );
*/
        }
    }

    // for the last bases...
  
//    printf("pos:%d(%c), flag:%d qual: %d(ret %d)\n", 
//           b->core.pos+1, seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );

    bam_plbuf_push(0, buf); 

    bam_plbuf_destroy(buf); // release memory
    bam_destroy1(b);  // release memory size of bam1_t
     
    samclose(input.in);  
 
    kseq_destroy( seq );
    gzclose( pRef );

    return;  
}