Example #1
0
// currently, this function ONLY works if each read has one hit
void bam_mating_core(bamFile in, bamFile out)
{
	bam_header_t *header;
	bam1_t *b[2];
	int curr, has_prev, pre_end = 0, cur_end;
	kstring_t str;

	str.l = str.m = 0; str.s = 0;
	header = bam_header_read(in);
	bam_header_write(out, header);

	b[0] = bam_init1();
	b[1] = bam_init1();
	curr = 0; has_prev = 0;
	while (bam_read1(in, b[curr]) >= 0) {
		bam1_t *cur = b[curr], *pre = b[1-curr];
		if (cur->core.tid < 0) continue;
		cur_end = bam_calend(&cur->core, bam1_cigar(cur));
		if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
		if (cur->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
		if (has_prev) {
			if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
				cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
				pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
				if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
					&& !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE
				{
					uint32_t cur5, pre5;
					cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
					pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
					cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
				} else cur->core.isize = pre->core.isize = 0;
				if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
				else cur->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
				else pre->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
				if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
				bam_template_cigar(pre, cur, &str);
				bam_write1(out, pre);
				bam_write1(out, cur);
				has_prev = 0;
			} else { // unpaired or singleton
				pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
				if (pre->core.flag & BAM_FPAIRED) {
					pre->core.flag |= BAM_FMUNMAP;
					pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
				}
				bam_write1(out, pre);
			}
		} else has_prev = 1;
		curr = 1 - curr;
		pre_end = cur_end;
	}
	if (has_prev) bam_write1(out, b[1-curr]);
	bam_header_destroy(header);
	bam_destroy1(b[0]);
	bam_destroy1(b[1]);
	free(str.s);
}
Example #2
0
int bam_pad2unpad(bamFile in, bamFile out)
{
	bam_header_t *h;
	bam1_t *b;
	kstring_t r, q;
	uint32_t *cigar2 = 0;
	int n2 = 0, m2 = 0, *posmap = 0;

	h = bam_header_read(in);
	bam_header_write(out, h);
	b = bam_init1();
	r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
	while (bam_read1(in, b) >= 0) {
		uint32_t *cigar = bam1_cigar(b);
		n2 = 0;
		if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) {
			int i, k;
			unpad_seq(b, &r);
			write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
			replace_cigar(b, n2, cigar2);
			posmap = realloc(posmap, r.m * sizeof(int));
			for (i = k = 0; i < r.l; ++i) {
				posmap[i] = k; // note that a read should NOT start at a padding
				if (r.s[i]) ++k;
			}
		} else {
			int i, k, op;
			unpad_seq(b, &q);
			if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[0]);
			for (i = 0, k = b->core.pos; i < q.l; ++i, ++k)
				q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD);
			for (i = k = 1, op = q.s[0]; i < q.l; ++i) {
				if (op != q.s[i]) {
					write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
					op = q.s[i]; k = 1;
				} else ++k;
			}
			write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
			if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
			for (i = 2; i < n2; ++i)
				if (bam_cigar_op(cigar2[i]) == BAM_CMATCH && bam_cigar_op(cigar2[i-1]) == BAM_CPAD && bam_cigar_op(cigar2[i-2]) == BAM_CMATCH)
					cigar2[i] += cigar2[i-2], cigar2[i-2] = cigar2[i-1] = 0;
			for (i = k = 0; i < n2; ++i)
				if (cigar2[i]) cigar2[k++] = cigar2[i];
			n2 = k;
			replace_cigar(b, n2, cigar2);
			b->core.pos = posmap[b->core.pos];
		}
		bam_write1(out, b);
	}
	free(r.s); free(q.s); free(posmap);
	bam_destroy1(b);
	bam_header_destroy(h);
	return 0;
}
int samwrite(samfile_t *fp, const bam1_t *b)
{
	if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
	if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
	else {
		char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
		int l = strlen(s);
		fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
		free(s);
		return l + 1;
	}
}
Example #4
0
int main(int argc, char** argv)
{
    if(argc < 3) {
        printf("No input nor output files provided");
        return -1;
    }

    bamFile in = bam_open(argv[1], "r");
    bam_header_t* header;
    if (in == NULL) {
        printf("opening input file failed");
        return -1;
    }

    bam1_t* b = bam_init1();

    bamFile out = bam_open(argv[2], "w");
    if (out == NULL) {
        printf("opening input file failed");
        return -1;
    }

    header = bam_header_read(in);
    if(bam_header_write(out, header) < 0) {
        printf("writing header failed");
    }

    long nextPrunedId;
    if(!scanf ("%lu", &nextPrunedId)) {
        printf("warning: no ids provided");
        return -1;
    }
    long id = 0;
    while (bam_read1(in, b) >= 0) {
        // write BAM back
        if (nextPrunedId != id++) {
            bam_write1(out, b);
        } else {
            // fprintf(stderr, "pruning: id: %lu, pos: %d, length: %d\n", nextPrunedId, b->core.pos, b->core.l_qseq);
            if(!scanf ("%lu", &nextPrunedId)) {
                break;
            }
        }
    }

    // closing all resources
    bam_header_destroy(header);
    bam_close(in);
    bam_close(out);
    bam_destroy1(b);
    return 0;
}
Example #5
0
int samwrite(samfile_t *fp, const bam1_t *b)
{
    if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
    if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
    else {
        char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
        int l = strlen(s);
        fp->x.tamw.writer(fp->x.tamw.writer_data, (uint8_t*) s, strlen(s));
        fp->x.tamw.writer(fp->x.tamw.writer_data, (uint8_t*) "\n", 1);
        free(s);
        return l + 1;
    }
}
Example #6
0
void convert_sam_to_bam(char* sam_input, char* bam_input) {
    bam1_t* bam_p = bam_init1();

    LOG_DEBUG("CONVERT-START: sam to bam\n");

    //open SAM file for read
    if (time_flag) {
        start_timer(t1_convert);
    }
    tamFile sam_fd = sam_open(sam_input);

    //open BAM file for write
    bam_file_t* bam_file_p =  bam_fopen_mode(bam_input, NULL, "w");

    //read header from SAM file
    bam_header_t* bam_header_p = sam_header_read(sam_fd);

    //write header to BAM file
    bam_header_write(bam_file_p->bam_fd, bam_header_p);

    //write alignments to BAM file
    while (sam_read1(sam_fd, bam_header_p, bam_p) > 0) {
        bam_write1(bam_file_p->bam_fd, bam_p);
        num_alignments++;
    }

    //close BAM and SAM files, free bam alignment and bam file object
    bam_fclose(bam_file_p);
    sam_close(sam_fd);
    bam_header_destroy(bam_header_p);
    bam_destroy1(bam_p);
    if (time_flag) {
        stop_timer(t1_convert, t2_convert, convert_time);
    }

    //number_of_batchs = 1, convention value for statistics (not real batch)
    number_of_batchs = 1;
}
Example #7
0
static int uniform_fetch_func(bam1_t *b, void *data)
{
     uint8_t *to_delete;
     data_t_uniform *tmp = (data_t_uniform*)data;
     bam1_core_t *c = &b->core;
     char *iq;
     char *dq;

     iq = malloc((c->l_qseq+1) * sizeof(char));
     memset(iq, tmp->iq, c->l_qseq);
     iq[c->l_qseq] = '\0';

     to_delete = bam_aux_get(b, BI_TAG);
     if (to_delete) {
          bam_aux_del(b, to_delete);
     }
     bam_aux_append(b, BI_TAG, 'Z', c->l_qseq+1, (uint8_t*) iq);


     dq = malloc((c->l_qseq+1) * sizeof(char));
     memset(dq, tmp->dq, c->l_qseq);
     dq[c->l_qseq] = '\0';

     to_delete = bam_aux_get(b, BD_TAG);
     if (to_delete) {
          bam_aux_del(b, to_delete);
     }
     bam_aux_append(b, BD_TAG, 'Z', c->l_qseq+1, (uint8_t*) dq);

     bam_write1(tmp->out, b);

     free(iq);
     free(dq);

     return 0;
}
Example #8
0
static int dindel_fetch_func(bam1_t *b, void *data)
{
     data_t_dindel *tmp = (data_t_dindel*)data;
     bam1_core_t *c = &b->core;
     int rlen;
     uint8_t *to_delete;

     /* don't change reads failing default mask: BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP */
     if (c->flag & BAM_DEF_MASK) {
          /* fprintf(stderr, "skipping read: %s at pos %d\n", bam1_qname(b), c->pos); */
          bam_write1(tmp->out, b);
          return 0;
     }

     /* get the reference sequence and compute homopolymer array */
     if (tmp->tid != c->tid) {
             /*fprintf(stderr, "fetching reference sequence %s\n",
               tmp->in->header->target_name[c->tid]); */
          char *ref = fai_fetch(tmp->fai, tmp->in->header->target_name[c->tid], &rlen);
          strtoupper(ref);/* safeguard */
          int rlen = strlen(ref);
          tmp->tid = c->tid;
          if (tmp->hpcount) free(tmp->hpcount);
          tmp->hpcount = (int*)malloc(rlen*sizeof(int));
          find_homopolymers(ref, tmp->hpcount, rlen);
          free(ref);
          tmp->rlen = rlen;
          /* fprintf(stderr, "fetched reference sequence\n");*/
     }

     /* parse the cigar string */
     uint32_t *cigar = bam1_cigar(b);
     uint8_t indelq[c->l_qseq+1];
     /* fprintf(stderr, "l_qseq:%d\n", c->l_qseq); */
     int i;
     int x = c->pos; /* coordinate on reference */
     int y = 0; /* coordinate on query */
     for (i = 0; i < c->n_cigar; ++i) {
          int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
          if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
               for (j = 0; j < oplen; j++) {
                       /*fprintf(stderr, "query:%d, ref:%d, count:%d\n", 
                         y, x, tmp->hpcount[x+1]); */
                    /* FIXME clang complains: The left operand of '>' is a garbage value */
                    indelq[y] = (x > tmp->rlen-2) ? DINDELQ[0] : (tmp->hpcount[x+1]>18 ?
                         DINDELQ[0] : DINDELQ[tmp->hpcount[x+1]]);
                    x++; 
                    y++;
               }
          } else if (op == BAM_CHARD_CLIP) { /* do nothing */
          } else if (op == BAM_CDEL) {
               x += oplen;
          } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { 
               for (j = 0; j < oplen; j++) {
                       /* fprintf(stderr, "query:%d, ref:%d\n", y, x); */
                    indelq[y] = DINDELQ[0];
                    y++;
               }
          } else {
               LOG_FATAL("unknown op %d for read %s\n", op, bam1_qname(b));/* FIXME skip? seen this somewhere else properly handled */
               exit(1);
          }
     }
     indelq[y] = '\0';

     to_delete = bam_aux_get(b, BI_TAG);
     if (to_delete) {
          bam_aux_del(b, to_delete);
     }
     bam_aux_append(b, BI_TAG, 'Z', c->l_qseq+1, indelq);

     to_delete = bam_aux_get(b, BD_TAG);
     if (to_delete) {
          bam_aux_del(b, to_delete);
     }
     bam_aux_append(b, BD_TAG, 'Z', c->l_qseq+1, indelq);

     bam_write1(tmp->out, b);
     return 0;
}
Example #9
0
void filterReads(char * inBamFile,
                 char * outBamFile,
                 int minMapQual,
                 int minLen,
                 int maxMisMatches,
                 float minPcId,
                 float minPcAln,
                 int ignoreSuppAlignments,
                 int ignoreSecondaryAlignments) {
    //
    int result = -1;
    int outResult = -1;

    int supp_check = 0x0;
    if (ignoreSuppAlignments) {
        supp_check |= BAM_FSUPPLEMENTARY;
    }
    if (ignoreSecondaryAlignments) {
        supp_check |= BAM_FSECONDARY;
    }

    // helper variables
    BGZF* in = 0;
    BGZF* out = 0;
    bam1_t *b = bam_init1();
    bam_hdr_t *h;

    // open bam
    if ((in = bgzf_open(inBamFile, "r")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for reading.\n",
               inBamFile);
    }
    else if ((h = bam_hdr_read(in)) == 0) { // read header
        fprintf(stderr,
                "ERROR: Failed to read BAM header of file \"%s\".\n",
                inBamFile);
    }
    else if ((out = bgzf_open(outBamFile, "w")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for writing.\n",
               outBamFile);
    }
    else {
        // write and destroy header
        bam_hdr_write(out, h);
        bam_hdr_destroy(h);

        int line = 0;
        int matches, mismatches, qLen;
        float pcAln, pcId;
        int showStats = 0;

        // fetch alignments
        while ((result = bam_read1(in, b)) >= 0) {
            line += 1;

            // only primary mappings
            if ((b->core.flag & supp_check) != 0) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, non-primary\n", line);
                continue;
            }

            // only high quality
            if (b->core.qual < minMapQual) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, quality: %d\n", line, b->core.qual);
                continue;
            }

            // not too many absolute mismatches
            mismatches = bam_aux2i(bam_aux_get(b, "NM"));
            if (mismatches > maxMisMatches) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, mismatches: %d\n", line, mismatches);
                continue;
            }

            // not too short
            qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b));
            if (qLen < minLen) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, length: %d\n", line, qLen);
                continue;
            }

            // only high percent identity
            matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b));
            pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1
            if (pcId < minPcId) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, identity pc: %.4f\n", line, pcId);
                continue;
            }

            // only high percent alignment
            pcAln = matches / (float)qLen; // percentage as float between 0 to 1
            if (pcAln < minPcAln) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, alignment pc: %.4f\n", line, pcAln);
                continue;
            }

            if ((outResult = bam_write1(out, b)) < -1) {
                fprintf(stderr,
                        "ERROR: Attempt to write read no. %d to file \"%s\" failed with code %d.\n",
                        line, outBamFile, outResult);
            }
        }
        if (result < -1) {
            fprintf(stderr,
                    "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n",
                    line, inBamFile, result);
        }
    }
    if (in) bgzf_close(in);
    if (out) bgzf_close(out);
    bam_destroy1(b);
}
Example #10
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  int paired;//1 if not paired or pair read 1, 2 otherwise
  index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_NH version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
    fprintf(stderr,"Hashing...\n");fflush(stderr);
  }

  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    else paired=1;
    ++num_alns;
    new_read_aln(ht,fix_read_name(bam1_qname(aln),paired));
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  bam_close(in);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns);
    fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024);  
    fprintf(stderr,"Updating entries with NH and printing BAM...\n");
    fflush(stderr);
  }
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    paired=1;
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    ++num_alns;
    READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired));

    assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    int32_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      //      printf("!>%s %d\n",bam1_qname(aln),r->ctr);
#endif
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
Example #11
0
int main(int argc, char *argv[])  
{  
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  
  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam>\n");  
    return 1;  
  }  
  
  // Open file and exit if error
  //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb");
  in = bam_open(argv[1], "rb");
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  printf("Hashing...\n");flush(stdout);
  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    new_read_aln(ht,bam1_qname(aln));
  }
  bam_close(in);  
  printf("Hashing complete (%lu alignments)\n",num_alns);
  printf("Memory used in the hash: %ld MB\n",index_mem/1024/1024);  
  flush(stdout);
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    
    READ_ALN *r=get_read_aln(ht,bam1_qname(aln));

    //assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    uint8_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      printf("!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    // in->header
    // Also fix the XS:A tag
    // BAM_FREAD1
    // BAM_FREAD2
    // BAM_FREVERSE the read is mapped to the reverse strand 
    //bam1_cigar(b) 
      //BAM_CREF_SKIP 3 CIGAR skip on the reference (e.g. spliced alignment)
      //BAM_FREVERSE 16 the read is mapped to the reverse strand
    if (aln->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
    if (aln->core.flag & ! BAM_FPAIRED) continue; // not paired
    if (aln->core.flag & ! BAM_FPROPER_PAIR) continue; // not a proper pair
    if (aln->core.flag & ! BAM_FMUNMAP) continue; // the mate is mapped
    if (aln->core.flag & BAM_FSECONDARY) continue; // secundary read
    if (aln->core.flag & BAM_FREAD2) continue; // only count each pair once
    // core.strand == 0 (f/+) 1 r/-
    // flag
    // bam1_qname(b)
    bam_write1(out,aln);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  return 0;  
/*
uint8_t *old_nm = bam_aux_get(b, "NM");
90 	if (c->flag & BAM_FUNMAP) return;
91 	if (old_nm) old_nm_i = bam_aux2i(old_nm);
92 	if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
93 	else if (nm != old_nm_i) {
94 	fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm);
95 	bam_aux_del(b, old_nm);
96 	bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
97 	}
*/
}  
Example #12
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  bamFile in,in2; 
  bamFile out; 


  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_se_flag <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_se_flag version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
  }

  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FPAIRED ) { // PAIRED

    } else { //SE 
      //turn off the other pair related flags
      aln->core.flag&=~BAM_FPROPER_PAIR;
      aln->core.flag&=~BAM_FMUNMAP;
      aln->core.flag&=~BAM_FREAD1;
      aln->core.flag&=~BAM_FREAD2;
      fprintf(stderr, ".");  
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
    ++num_alns;
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
Example #13
0
// FIX MRNM and unaligned reads
int main(int argc, char *argv[])  
{  
  bamFile in; 
  long num_unmapped=0;
  long num_alns_pe=0;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_tophat2_pe_fix <in.bam> <out.bam>\n");  
    return 1;  
  }  

  in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb"); 
  if (in == 0) {  
    fprintf(stderr, "ERROR: Fail to open input BAM file %s\n", argv[1]);  
    return 1;  
  }  
  

  int ref;  
  unsigned long num_alns=0;
  // counts
  unsigned long unalign_mapq_fix=0;
  unsigned long mtid_fix=0;
  unsigned long mpos_fix=0;
  bamFile out; 
  bam_header_t *header;
  header = bam_header_read(in);
  bam1_t *aln=bam_init1();

  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  
  bam_header_write(out,header);

  while(bam_read1(in,aln)>=0) {
    ++num_alns;
    if (aln->core.tid < 0) { 
      // unaligned reads
      if ( aln->core.qual!=0 ) {
	//fprintf(stderr, "ERROR: Unaligned read with quality > 0 in line %lu\n",num_alns);
	aln->core.qual=0;
	unalign_mapq_fix++;
      }
    }
    //fprintf(stderr,"%s %c %d %d\n",bam1_qname(aln),(aln->core.tid<0?'U':'M'),aln->core.mtid,aln->core.mpos);
    if ( aln->core.flag & BAM_FPAIRED ) {
      //fprintf(stderr,"paired %d\n",(aln->core.flag & BAM_FMUNMAP));
      // paired
      if ( aln->core.mtid <0 && !(aln->core.flag & BAM_FMUNMAP) ) {
	aln->core.flag |= BAM_FMUNMAP;
	aln->core.mpos=-1;
	mtid_fix++;
      }
      if ( aln->core.mpos <0 && !(aln->core.flag & BAM_FMUNMAP) ) {
	aln->core.flag |= BAM_FMUNMAP;
	aln->core.mtid=-1;
	mpos_fix++;
      }
    }
    
    bam_write1(out,aln);

  }
  bam_destroy1(aln);
  bam_close(in);  
  bam_close(out);  
  // 
  fprintf(stderr,"unaligned MAPQ fixes: %lu\n",unalign_mapq_fix);
  fprintf(stderr,"unaligned mtid fixes: %lu\n",mtid_fix);
  fprintf(stderr,"unaligned mpos fixes: %lu\n",mpos_fix);
  return 0;  
}  
Example #14
0
// load a pair from a bam file
SR_Status SR_BamInStreamLoadPair(SR_BamNode** ppUpAlgn, 
                                 SR_BamNode** ppDownAlgn, 
                                 SR_BamInStream* pBamInStream, 
				 bamFile* bam_writer_complete_bam) 
{
    khash_t(queryName)* pNameHashPrev = pBamInStream->pNameHashes[PREV_BIN];
    khash_t(queryName)* pNameHashCurr = pBamInStream->pNameHashes[CURR_BIN];

    int ret = 1;
    while(ret > 0 && (ret = SR_BamInStreamLoadNext(pBamInStream)) > 0)
    {
	// exclude those reads who are non-paired-end, qc-fail, duplicate-marked, proper-paired?!, 
        // both aligned, secondary-alignment and no-name-specified.
        SR_Bool shouldBeFiltered = pBamInStream->filterFunc(pBamInStream->pNewNode, pBamInStream->filterData);
        if (shouldBeFiltered)
        {
	    #ifdef VERBOSE_DEBUG
	      fprintf(stderr,"%s: filtered.\n", bam1_qname(&(pBamInStream->pNewNode->alignment)));
	    #endif

	    if (bam_writer_complete_bam != NULL) bam_write1(*bam_writer_complete_bam, &(pBamInStream->pNewNode->alignment));
	    
	    SR_BamNodeFree(pBamInStream->pNewNode, pBamInStream->pMemPool);
            pBamInStream->pNewNode = NULL;
            continue;
        } else {
	    #ifdef VERBOSE_DEBUG
	      fprintf(stderr,"%s: kept in buffer.\n", bam1_qname(&(pBamInStream->pNewNode->alignment)));
	    #endif
	}

        // update the current ref ID or position if the incoming alignment has a 
        // different value. The name hash and the bam array will be reset
        if (pNameHashPrev != NULL 
            && (pBamInStream->pNewNode->alignment.core.tid != pBamInStream->currRefID
                || pBamInStream->pNewNode->alignment.core.pos >= pBamInStream->currBinPos + 2 * pBamInStream->binLen))
        {
            if (pBamInStream->pNewNode->alignment.core.tid != pBamInStream->currRefID)
            {
                ret = SR_OUT_OF_RANGE; // different chromosome id
            }

            pBamInStream->currRefID  = pBamInStream->pNewNode->alignment.core.tid;
            pBamInStream->currBinPos = pBamInStream->pNewNode->alignment.core.pos;

            // Clear the hash buffer
	    kh_clear(queryName, pNameHashPrev);
            kh_clear(queryName, pNameHashCurr);

            // Store alignments before releasing them
            if (bam_writer_complete_bam != NULL) {
	      SR_BamNode* cur = pBamInStream->pAlgnLists[PREV_BIN].first;
	      for (int i = 0; i < pBamInStream->pAlgnLists[PREV_BIN].numNode; ++i) {
	        // if the cur is not NULL, store the cur in the complete bam
		if (cur != NULL) bam_write1(*bam_writer_complete_bam, &(cur->alignment));
		cur = cur->next;
	      } // end for

	      cur = pBamInStream->pAlgnLists[CURR_BIN].first;
	      for (int i = 0; i < pBamInStream->pAlgnLists[CURR_BIN].numNode; ++i) {
	        // if the cur is not NULL, store the cur in the complete bam
		if (cur != NULL) bam_write1(*bam_writer_complete_bam, &(cur->alignment));
		cur = cur->next;
	      } // end for
	    } // end if
	      
	    SR_BamListReset(&(pBamInStream->pAlgnLists[PREV_BIN]), pBamInStream->pMemPool);
            SR_BamListReset(&(pBamInStream->pAlgnLists[CURR_BIN]), pBamInStream->pMemPool);

        }
        else if (pBamInStream->pNewNode->alignment.core.pos >= pBamInStream->currBinPos + pBamInStream->binLen)
        {
            pBamInStream->currBinPos += pBamInStream->binLen;

            kh_clear(queryName, pNameHashPrev);
            SR_SWAP(pNameHashPrev, pNameHashCurr, khash_t(queryName)*);

            // Store alignments before releasing them
	    if (bam_writer_complete_bam != NULL) {
	      SR_BamNode* cur = pBamInStream->pAlgnLists[PREV_BIN].first;
	      for (int i = 0; i < pBamInStream->pAlgnLists[PREV_BIN].numNode; ++i) {
	        // if the cur is not NULL, store the cur in the complete bam
		if (cur != NULL) bam_write1(*bam_writer_complete_bam, &(cur->alignment));
		cur = cur->next;
              }
	    } // end if

	    SR_BamListReset(&(pBamInStream->pAlgnLists[PREV_BIN]), pBamInStream->pMemPool);

            SR_SWAP(pBamInStream->pAlgnLists[PREV_BIN], pBamInStream->pAlgnLists[CURR_BIN], SR_BamList);
        }
	else
	{
	} // end if-elseif-else