Esempio n. 1
0
// currently, this function ONLY works if each read has one hit
void bam_mating_core(bamFile in, bamFile out)
{
	bam_header_t *header;
	bam1_t *b[2];
	int curr, has_prev, pre_end = 0, cur_end;
	kstring_t str;

	str.l = str.m = 0; str.s = 0;
	header = bam_header_read(in);
	bam_header_write(out, header);

	b[0] = bam_init1();
	b[1] = bam_init1();
	curr = 0; has_prev = 0;
	while (bam_read1(in, b[curr]) >= 0) {
		bam1_t *cur = b[curr], *pre = b[1-curr];
		if (cur->core.tid < 0) continue;
		cur_end = bam_calend(&cur->core, bam1_cigar(cur));
		if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
		if (cur->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
		if (has_prev) {
			if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
				cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
				pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
				if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
					&& !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE
				{
					uint32_t cur5, pre5;
					cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
					pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
					cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
				} else cur->core.isize = pre->core.isize = 0;
				if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
				else cur->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
				else pre->core.flag &= ~BAM_FMREVERSE;
				if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
				if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
				bam_template_cigar(pre, cur, &str);
				bam_write1(out, pre);
				bam_write1(out, cur);
				has_prev = 0;
			} else { // unpaired or singleton
				pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
				if (pre->core.flag & BAM_FPAIRED) {
					pre->core.flag |= BAM_FMUNMAP;
					pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
				}
				bam_write1(out, pre);
			}
		} else has_prev = 1;
		curr = 1 - curr;
		pre_end = cur_end;
	}
	if (has_prev) bam_write1(out, b[1-curr]);
	bam_header_destroy(header);
	bam_destroy1(b[0]);
	bam_destroy1(b[1]);
	free(str.s);
}
Esempio n. 2
0
static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout)
{
	char *name, mode[3];
	int i;
	bamFile fp;
	ks_mergesort(sort, k, buf, 0);
	name = (char*)calloc(strlen(prefix) + 20, 1);
	if (n >= 0) {
		sprintf(name, "%s.%.4d.bam", prefix, n);
		strcpy(mode, "w1");
	} else {
		sprintf(name, "%s.bam", prefix);
		strcpy(mode, "w");
	}
	fp = is_stdout? bam_dopen(fileno(stdout), mode) : bam_open(name, mode);
	if (fp == 0) {
		fprintf(stderr, "[sort_blocks] fail to create file %s.\n", name);
		free(name);
		// FIXME: possible memory leak
		return;
	}
	free(name);
	bam_header_write(fp, h);
	for (i = 0; i < k; ++i)
		bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data);
	bam_close(fp);
}
Esempio n. 3
0
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
	BGZF *fp;
	bam_header_t *old;
	int len;
	uint8_t *buf;
	if (in->open_mode != 'r') return -1;
	buf = malloc(BUF_SIZE);
	old = bam_header_read(in);
	fp = bgzf_dopen(fd, "w");
	bam_header_write(fp, h);
	if (in->block_offset < in->block_length) {
		bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
		bgzf_flush(fp);
	}
#ifdef _USE_KNETFILE
	while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0)
		fwrite(buf, 1, len, fp->fp);
#else
	while (!feof(in->fp) && (len = fread(buf, 1, BUF_SIZE, in->fp)) > 0)
		fwrite(buf, 1, len, fp->fp);
#endif
	free(buf);
	fp->block_offset = in->block_offset = 0;
	bgzf_close(fp);
	return 0;
}
Esempio n. 4
0
samfile_t* samopen_out(quip_writer_t writer, void* writer_data, bool binary, void* aux)
{
    samfile_t *fp;
    fp = (samfile_t*)calloc(1, sizeof(samfile_t));

    fp->header = bam_header_dup((const bam_header_t*)aux);

    if (binary) { // binary
        fp->type |= TYPE_BAM;
        fp->x.bam = bam_open_out(writer, writer_data);
        if (fp->x.bam == 0) goto open_err_ret;
        bam_header_write(fp->x.bam, fp->header);

    } else { // text
        // open file

        fp->x.tamw.writer      = writer;
        fp->x.tamw.writer_data = writer_data;
        fp->type |= BAM_OFDEC<<2;

        // write header
        if (aux) {
            int i;
            bam_header_t *alt;
            // parse the header text
            alt = bam_header_init();
            alt->l_text = fp->header->l_text;
            alt->text = fp->header->text;
            sam_header_parse(alt);
            alt->l_text = 0;
            alt->text = 0;
            // check if there are @SQ lines in the header
            writer(writer_data, (uint8_t*) fp->header->text, fp->header->l_text);

            if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
                if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
                    fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
            } else { // then dump ->target_{name,len}
                char target_len_str[11];
                for (i = 0; i < fp->header->n_targets; ++i) {
                    writer(writer_data, (uint8_t*) "@SQ\tSN:", 7);
                    writer(writer_data, (uint8_t*) fp->header->target_name[i], strlen(fp->header->target_name[i]));
                    writer(writer_data, (uint8_t*) "\tLN:", 4);
                    snprintf(target_len_str, sizeof(target_len_str), "%"PRIu32, fp->header->target_len[i]);
                    writer(writer_data, (uint8_t*) target_len_str, strlen(target_len_str));
                    writer(writer_data, (uint8_t*) "\n", 1);
                }
            }
            bam_header_destroy(alt);
        }
    }

    return fp;

open_err_ret:
    free(fp);
    return 0;
}
Esempio n. 5
0
int bam_pad2unpad(bamFile in, bamFile out)
{
	bam_header_t *h;
	bam1_t *b;
	kstring_t r, q;
	uint32_t *cigar2 = 0;
	int n2 = 0, m2 = 0, *posmap = 0;

	h = bam_header_read(in);
	bam_header_write(out, h);
	b = bam_init1();
	r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
	while (bam_read1(in, b) >= 0) {
		uint32_t *cigar = bam1_cigar(b);
		n2 = 0;
		if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) {
			int i, k;
			unpad_seq(b, &r);
			write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
			replace_cigar(b, n2, cigar2);
			posmap = realloc(posmap, r.m * sizeof(int));
			for (i = k = 0; i < r.l; ++i) {
				posmap[i] = k; // note that a read should NOT start at a padding
				if (r.s[i]) ++k;
			}
		} else {
			int i, k, op;
			unpad_seq(b, &q);
			if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[0]);
			for (i = 0, k = b->core.pos; i < q.l; ++i, ++k)
				q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD);
			for (i = k = 1, op = q.s[0]; i < q.l; ++i) {
				if (op != q.s[i]) {
					write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
					op = q.s[i]; k = 1;
				} else ++k;
			}
			write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
			if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
			for (i = 2; i < n2; ++i)
				if (bam_cigar_op(cigar2[i]) == BAM_CMATCH && bam_cigar_op(cigar2[i-1]) == BAM_CPAD && bam_cigar_op(cigar2[i-2]) == BAM_CMATCH)
					cigar2[i] += cigar2[i-2], cigar2[i-2] = cigar2[i-1] = 0;
			for (i = k = 0; i < n2; ++i)
				if (cigar2[i]) cigar2[k++] = cigar2[i];
			n2 = k;
			replace_cigar(b, n2, cigar2);
			b->core.pos = posmap[b->core.pos];
		}
		bam_write1(out, b);
	}
	free(r.s); free(q.s); free(posmap);
	bam_destroy1(b);
	bam_header_destroy(h);
	return 0;
}
Esempio n. 6
0
static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_header_t *h, int n_threads)
{
	size_t i;
	bamFile fp;
	fp = strcmp(fn, "-")? bam_open(fn, mode) : bam_dopen(fileno(stdout), mode);
	if (fp == 0) return;
	bam_header_write(fp, h);
	if (n_threads > 1) bgzf_mt(fp, n_threads, 256);
	for (i = 0; i < l; ++i)
		bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data);
	bam_close(fp);
}
Esempio n. 7
0
int main(int argc, char** argv)
{
    if(argc < 3) {
        printf("No input nor output files provided");
        return -1;
    }

    bamFile in = bam_open(argv[1], "r");
    bam_header_t* header;
    if (in == NULL) {
        printf("opening input file failed");
        return -1;
    }

    bam1_t* b = bam_init1();

    bamFile out = bam_open(argv[2], "w");
    if (out == NULL) {
        printf("opening input file failed");
        return -1;
    }

    header = bam_header_read(in);
    if(bam_header_write(out, header) < 0) {
        printf("writing header failed");
    }

    long nextPrunedId;
    if(!scanf ("%lu", &nextPrunedId)) {
        printf("warning: no ids provided");
        return -1;
    }
    long id = 0;
    while (bam_read1(in, b) >= 0) {
        // write BAM back
        if (nextPrunedId != id++) {
            bam_write1(out, b);
        } else {
            // fprintf(stderr, "pruning: id: %lu, pos: %d, length: %d\n", nextPrunedId, b->core.pos, b->core.l_qseq);
            if(!scanf ("%lu", &nextPrunedId)) {
                break;
            }
        }
    }

    // closing all resources
    bam_header_destroy(header);
    bam_close(in);
    bam_close(out);
    bam_destroy1(b);
    return 0;
}
Esempio n. 8
0
int add_dindel(const char *bam_in, const char *bam_out, const char *ref)
{
	data_t_dindel tmp;
    int count = 0;
    bam1_t *b = NULL;

	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
             return 1;
        }
    if ((tmp.fai = fai_load(ref)) == 0) {
         LOG_FATAL("Failed to open reference file %s\n", ref);
         return 1;
    }
    /*warn_old_fai(ref);*/

    if (!bam_out || bam_out[0] == '-') {
         tmp.out = bam_dopen(fileno(stdout), "w");
    } else {
         tmp.out = bam_open(bam_out, "w");
    }
    bam_header_write(tmp.out, tmp.in->header);
    
    b = bam_init1();
    tmp.tid = -1;
    tmp.hpcount = 0;
    tmp.rlen = 0;
    while (samread(tmp.in, b) >= 0) {
         count++;
         dindel_fetch_func(b, &tmp); 
    }
    bam_destroy1(b);
    
    if (tmp.hpcount) free(tmp.hpcount);
    samclose(tmp.in);
    bam_close(tmp.out);
    fai_destroy(tmp.fai);
	LOG_VERBOSE("Processed %d reads\n", count);
	return 0;
}
Esempio n. 9
0
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
    BGZF *fp;
    bam_header_t *old;
    ssize_t len;
    uint8_t *buf;
    if (in->is_write) return -1;
    buf = malloc(BUF_SIZE);
    old = bam_header_read(in);
    fp = bgzf_fdopen(fd, "w");
    bam_header_write(fp, h);
    if (in->block_offset < in->block_length) {
        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
        bgzf_flush(fp);
    }
    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
        bgzf_raw_write(fp, buf, len);
    free(buf);
    fp->block_offset = in->block_offset = 0;
    bgzf_close(fp);
    return 0;
}
Esempio n. 10
0
void convert_sam_to_bam(char* sam_input, char* bam_input) {
    bam1_t* bam_p = bam_init1();

    LOG_DEBUG("CONVERT-START: sam to bam\n");

    //open SAM file for read
    if (time_flag) {
        start_timer(t1_convert);
    }
    tamFile sam_fd = sam_open(sam_input);

    //open BAM file for write
    bam_file_t* bam_file_p =  bam_fopen_mode(bam_input, NULL, "w");

    //read header from SAM file
    bam_header_t* bam_header_p = sam_header_read(sam_fd);

    //write header to BAM file
    bam_header_write(bam_file_p->bam_fd, bam_header_p);

    //write alignments to BAM file
    while (sam_read1(sam_fd, bam_header_p, bam_p) > 0) {
        bam_write1(bam_file_p->bam_fd, bam_p);
        num_alignments++;
    }

    //close BAM and SAM files, free bam alignment and bam file object
    bam_fclose(bam_file_p);
    sam_close(sam_fd);
    bam_header_destroy(bam_header_p);
    bam_destroy1(bam_p);
    if (time_flag) {
        stop_timer(t1_convert, t2_convert, convert_time);
    }

    //number_of_batchs = 1, convention value for statistics (not real batch)
    number_of_batchs = 1;
}
Esempio n. 11
0
int add_uniform(const char *bam_in, const char *bam_out,
                const int ins_qual, const int del_qual)
{
	data_t_uniform tmp;
    uint8_t iq = ENCODE_Q(ins_qual+33);
    uint8_t dq = ENCODE_Q(del_qual+33);
    bam1_t *b = NULL;
    int count = 0;

	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
         return 1;
    }

    tmp.iq = iq;
    tmp.dq = dq;

    if (!bam_out || bam_out[0] == '-') {
         tmp.out = bam_dopen(fileno(stdout), "w");
    } else {
         tmp.out = bam_open(bam_out, "w");
    }
    bam_header_write(tmp.out, tmp.in->header);
    
    b = bam_init1();
    while (samread(tmp.in, b) >= 0) {
         count++;
         uniform_fetch_func(b, &tmp); 
    }
    bam_destroy1(b);
    
    samclose(tmp.in);
    bam_close(tmp.out);
    LOG_VERBOSE("Processed %d reads\n", count);
    return 0;
}
Esempio n. 12
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  int paired;//1 if not paired or pair read 1, 2 otherwise
  index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_NH version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
    fprintf(stderr,"Hashing...\n");fflush(stderr);
  }

  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    else paired=1;
    ++num_alns;
    new_read_aln(ht,fix_read_name(bam1_qname(aln),paired));
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  bam_close(in);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns);
    fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024);  
    fprintf(stderr,"Updating entries with NH and printing BAM...\n");
    fflush(stderr);
  }
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    paired=1;
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FREAD2) paired=2;
    ++num_alns;
    READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired));

    assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    int32_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      //      printf("!>%s %d\n",bam1_qname(aln),r->ctr);
#endif
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
Esempio n. 13
0
int bam_merge_core2(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg, int level)
#endif
{
	bamFile fpout, *fp;
	heap1_t *heap;
	bam_header_t *hout = 0;
	bam_header_t *hheaders = NULL;
	int i, j, *RG_len = 0;
	uint64_t idx = 0;
	char **RG = 0, mode[8];
	bam_iter_t *iter = 0;

	if (headers) {
		tamFile fpheaders = sam_open(headers);
		if (fpheaders == 0) {
			const char *message = strerror(errno);
			fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
			return -1;
		}
		hheaders = sam_header_read(fpheaders);
		sam_close(fpheaders);
	}

	g_is_by_qname = by_qname;
	fp = (bamFile*)calloc(n, sizeof(bamFile));
	heap = (heap1_t*)calloc(n, sizeof(heap1_t));
	iter = (bam_iter_t*)calloc(n, sizeof(bam_iter_t));
	// prepare RG tag
	if (flag & MERGE_RG) {
		RG = (char**)calloc(n, sizeof(void*));
		RG_len = (int*)calloc(n, sizeof(int));
		for (i = 0; i != n; ++i) {
			int l = strlen(fn[i]);
			const char *s = fn[i];
			if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4;
			for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
			++j; l -= j;
			RG[i] = calloc(l + 1, 1);
			RG_len[i] = l;
			strncpy(RG[i], s + j, l);
		}
	}
	// read the first
	for (i = 0; i != n; ++i) {
		bam_header_t *hin;
		fp[i] = bam_open(fn[i], "r");
		if (fp[i] == 0) {
			int j;
			fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
			for (j = 0; j < i; ++j) bam_close(fp[j]);
			free(fp); free(heap);
			// FIXME: possible memory leak
			return -1;
		}
		hin = bam_header_read(fp[i]);
		if (i == 0) { // the first BAM
			hout = hin;
		} else { // validate multiple baf
			int min_n_targets = hout->n_targets;
			if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets;

			for (j = 0; j < min_n_targets; ++j)
				if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) {
					fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n",
							hout->target_name[j], hin->target_name[j], fn[i]);
					return -1;
				}

			// If this input file has additional target reference sequences,
			// add them to the headers to be output
			if (hin->n_targets > hout->n_targets) {
				swap_header_targets(hout, hin);
				// FIXME Possibly we should also create @SQ text headers
				// for the newly added reference sequences
			}

			bam_header_destroy(hin);
		}
	}

	if (hheaders) {
		// If the text headers to be swapped in include any @SQ headers,
		// check that they are consistent with the existing binary list
		// of reference information.
		if (hheaders->n_targets > 0) {
			if (hout->n_targets != hheaders->n_targets) {
				fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers);
				if (!reg) return -1;
			}
			for (j = 0; j < hout->n_targets; ++j)
				if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) {
					fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers);
					if (!reg) return -1;
				}
		}

		swap_header_text(hout, hheaders);
		bam_header_destroy(hheaders);
	}

	if (reg) {
		int tid, beg, end;
		if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) {
			fprintf(stderr, "[%s] Malformated region string or undefined reference name\n", __func__);
			return -1;
		}
		for (i = 0; i < n; ++i) {
			bam_index_t *idx;
			idx = bam_index_load(fn[i]);
			iter[i] = bam_iter_query(idx, tid, beg, end);
			bam_index_destroy(idx);
		}
	}

	for (i = 0; i < n; ++i) {
		heap1_t *h = heap + i;
		h->i = i;
		h->b = (bam1_t*)calloc(1, sizeof(bam1_t));
		if (bam_iter_read(fp[i], iter[i], h->b) >= 0) {
			h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b);
			h->idx = idx++;
		}
		else h->pos = HEAP_EMPTY;
	}
	if (flag & MERGE_UNCOMP) level = 0;
	else if (flag & MERGE_LEVEL1) level = 1;
	strcpy(mode, "w");
	if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9);
	if ((fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w")) == 0) {
		fprintf(stderr, "[%s] fail to create the output file.\n", __func__);
		return -1;
	}
	bam_header_write(fpout, hout);
	bam_header_destroy(hout);
#ifndef _PBGZF_USE 
	if (!(flag & MERGE_UNCOMP)) bgzf_mt(fpout, n_threads, 256);
#endif

	ks_heapmake(heap, n, heap);
	while (heap->pos != HEAP_EMPTY) {
		bam1_t *b = heap->b;
		if (flag & MERGE_RG) {
			uint8_t *rg = bam_aux_get(b, "RG");
			if (rg) bam_aux_del(b, rg);
			bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
		}
		bam_write1_core(fpout, &b->core, b->data_len, b->data);
		if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) {
			heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b);
			heap->idx = idx++;
		} else if (j == -1) {
			heap->pos = HEAP_EMPTY;
			free(heap->b->data); free(heap->b);
			heap->b = 0;
		} else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
		ks_heapadjust(heap, 0, n, heap);
	}

	if (flag & MERGE_RG) {
		for (i = 0; i != n; ++i) free(RG[i]);
		free(RG); free(RG_len);
	}
	for (i = 0; i != n; ++i) {
		bam_iter_destroy(iter[i]);
		bam_close(fp[i]);
	}
	bam_close(fpout);
	free(fp); free(heap); free(iter);
	return 0;
}
Esempio n. 14
0
int main(int argc, char *argv[])  
{  
  short out2stdout=0;
  bamFile in,in2; 
  bamFile out; 


  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_se_flag <in.bam> <out.bam or - for stdout>\n");  
    return 1;  
  }  
  // Open file and exit if error
  in = bam_open(argv[1], "rb");
  out2stdout = strcmp(argv[2], "-")? 0 : 1; 
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  if (!out2stdout) {
    fprintf(stderr,"bam_fix_se_flag version %s\n",VERSION);
    fprintf(stderr,"Processing %s\n",argv[1]);
  }

  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  num_alns=0;
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    if (aln->core.flag & BAM_FUNMAP) continue;
    if (aln->core.flag & BAM_FPAIRED ) { // PAIRED

    } else { //SE 
      //turn off the other pair related flags
      aln->core.flag&=~BAM_FPROPER_PAIR;
      aln->core.flag&=~BAM_FMUNMAP;
      aln->core.flag&=~BAM_FREAD1;
      aln->core.flag&=~BAM_FREAD2;
      fprintf(stderr, ".");  
    }
    bam_write1(out,aln);
    if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns);
    ++num_alns;
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  if(!out2stdout) {
    fprintf(stderr,"%s%lu\n",BACKLINE,num_alns);
    fprintf(stderr,"Done.\n");
  }
  return 0;  
}  
Esempio n. 15
0
samfile_t *samopen(const char *fn, const char *mode, const void *aux)
{
	samfile_t *fp;
	fp = (samfile_t*)calloc(1, sizeof(samfile_t));
	if (mode[0] == 'r') { // read
		fp->type |= TYPE_READ;
		if (mode[1] == 'b') { // binary
			fp->type |= TYPE_BAM;
			fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
			if (fp->x.bam == 0) goto open_err_ret;
			fp->header = bam_header_read(fp->x.bam);
		} else { // text
			fp->x.tamr = sam_open(fn);
			if (fp->x.tamr == 0) goto open_err_ret;
			fp->header = sam_header_read(fp->x.tamr);
			if (fp->header->n_targets == 0) { // no @SQ fields
				if (aux) { // check if aux is present
					bam_header_destroy(fp->header);
					fp->header = sam_header_read2((const char*)aux);
				}
				if (fp->header->n_targets == 0)
					fprintf(stderr, "[samopen] empty header.\n");
			} else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
		}
	} else if (mode[0] == 'w') { // write
		fp->header = bam_header_dup((const bam_header_t*)aux);
		if (mode[1] == 'b') { // binary
			fp->type |= TYPE_BAM;
			fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w");
			if (fp->x.bam == 0) goto open_err_ret;
			bam_header_write(fp->x.bam, fp->header);
		} else { // text
			// open file
			fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
			if (fp->x.tamr == 0) goto open_err_ret;
			// write header
			if (strstr(mode, "h")) {
				int i;
				bam_header_t *alt;
				// parse the header text 
				alt = bam_header_init();
				alt->l_text = fp->header->l_text; alt->text = fp->header->text;
				sam_header_parse(alt);
				alt->l_text = 0; alt->text = 0;
				// check if there are @SQ lines in the header
				if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
					if (alt->n_targets != fp->header->n_targets)
						fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
					fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
				} else { // then dump ->target_{name,len}
					for (i = 0; i < fp->header->n_targets; ++i)
						fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
				}
				bam_header_destroy(alt);
			}
		}
	}
	return fp;

open_err_ret:
	free(fp);
	return 0;
}
Esempio n. 16
0
int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
{
    BGZF *fp;
    uint8_t *buf;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;

    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
    if (fp == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
        return 1;
    }
    if (h) bam_header_write(fp, h);

    buf = (uint8_t*) malloc(BUF_SIZE);
    for(i = 0; i < nfn; ++i){
        BGZF *in;
        bam_header_t *old;
        int len,j;

        in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
            return -1;
        }
        if (in->is_write) return -1;

        old = bam_header_read(in);
        if (h == 0 && i == 0) bam_header_write(fp, old);

        if (in->block_offset < in->block_length) {
            bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
            bgzf_flush(fp);
        }

        j=0;
        while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
                    return -1;
                }
                bgzf_raw_write(fp, ebuf, len);
                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) bgzf_raw_write(fp, ebuf, es);
                len-= es;
                memcpy(ebuf,buf+len,es);
                bgzf_raw_write(fp, buf, len);
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
                fprintf(stderr, " Possible output corruption.\n");
                bgzf_raw_write(fp, ebuf, es);
            }
        }
        bam_header_destroy(old);
        bgzf_close(in);
    }
    free(buf);
    bgzf_close(fp);
    return 0;
}
Esempio n. 17
0
int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
{
    BGZF *fp;
    FILE* fp_file;
    uint8_t *buf;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;
    
    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(_fileno(stdout), "w");
    if (fp == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __FUNCTION__, outbam);
        return 1;
    }
    if (h) bam_header_write(fp, h);
    
    buf = (uint8_t*) malloc(BUF_SIZE);
    for(i = 0; i < nfn; ++i){
        BGZF *in;
        bam_header_t *old;
        int len,j;
        
        in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(_fileno(stdin), "r");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __FUNCTION__, fn[i]);
            return -1;
        }
        if (in->open_mode != 'r') return -1;
        
        old = bam_header_read(in);
        if (h == 0 && i == 0) bam_header_write(fp, old);
        
        if (in->block_offset < in->block_length) {
            bgzf_write(fp, (uint8_t*)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
            bgzf_flush(fp);
        }
        
        j=0;
#ifdef _USE_KNETFILE
        fp_file=fp->x.fpw;
        while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) {
#else  
        fp_file=fp->file;
        while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
#endif
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __FUNCTION__, fn[i]);
                    return -1;
                }
                fwrite(ebuf, 1, len, fp_file);
                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) fwrite(ebuf, 1, es, fp_file);
                len-= es;
                memcpy(ebuf,buf+len,es);
                fwrite(buf, 1, len, fp_file);
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __FUNCTION__, fn[i]);
                fprintf(stderr, " Possible output corruption.\n");
                fwrite(ebuf, 1, es, fp_file);
            }
        }
        bam_header_destroy(old);
        bgzf_close(in);
    }
    free(buf);
    bgzf_close(fp);
    return 0;
}



int main_cat(int argc, char *argv[])
{
    bam_header_t *h = 0;
    char *outfn = 0;
    int c, ret;
    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
        switch (c) {
            case 'h': {
                tamFile fph = sam_open(optarg);
                if (fph == 0) {
                    fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __FUNCTION__, argv[1]);
                    return 1;
                }
                h = sam_header_read(fph);
                sam_close(fph);
                break;
            }
            case 'o': outfn = strdup(optarg); break;
        }
    }
    if (argc - optind < 2) {
        fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
        return 1;
    }
    ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
    free(outfn);
    return ret;
}
Esempio n. 18
0
int main(int argc, char *argv[])  
{  
  hashtable ht=new_hashtable(HASHSIZE);
  bamFile in,in2; 
  bamFile out; 
  
  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam>\n");  
    return 1;  
  }  
  
  // Open file and exit if error
  //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb");
  in = bam_open(argv[1], "rb");
  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (in == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  

  unsigned long num_alns=0;
  int ref;  

  // ***********
  // Copy header
  bam_header_t *header;
  header = bam_header_read(in);
  bam_header_write(out,header);

  // sorted by name?
  // Should not rely on the value in SO 
  bam1_t *aln=bam_init1();
  bam1_t *prev=bam_init1();

  printf("Hashing...\n");flush(stdout);
  while(bam_read1(in,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    new_read_aln(ht,bam1_qname(aln));
  }
  bam_close(in);  
  printf("Hashing complete (%lu alignments)\n",num_alns);
  printf("Memory used in the hash: %ld MB\n",index_mem/1024/1024);  
  flush(stdout);
  // reopen
  in2 = bam_open(argv[1], "rb");
  if (in2 == 0 ) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]);  
    return 1;  
  }  

  header = bam_header_read(in2);
  
  while(bam_read1(in2,aln)>=0) { // read alignment
    if (aln->core.tid < 0) continue;//ignore unaligned reads
    ++num_alns;
    
    READ_ALN *r=get_read_aln(ht,bam1_qname(aln));

    //assert(r!=NULL);
    // update the NH field
    uint8_t *old_nh = bam_aux_get(aln, "NH");    
    uint8_t nh=r->ctr;
    if (old_nh) {
      if (nh!=bam_aux2i(old_nh)) {
	fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh);
      }
      bam_aux_del(aln, old_nh);
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
    }
    if (!old_nh) { // add NH  
      bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh);
#ifdef DEBUG
      printf("!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh));
#endif
    }
    // in->header
    // Also fix the XS:A tag
    // BAM_FREAD1
    // BAM_FREAD2
    // BAM_FREVERSE the read is mapped to the reverse strand 
    //bam1_cigar(b) 
      //BAM_CREF_SKIP 3 CIGAR skip on the reference (e.g. spliced alignment)
      //BAM_FREVERSE 16 the read is mapped to the reverse strand
    if (aln->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments
    if (aln->core.flag & ! BAM_FPAIRED) continue; // not paired
    if (aln->core.flag & ! BAM_FPROPER_PAIR) continue; // not a proper pair
    if (aln->core.flag & ! BAM_FMUNMAP) continue; // the mate is mapped
    if (aln->core.flag & BAM_FSECONDARY) continue; // secundary read
    if (aln->core.flag & BAM_FREAD2) continue; // only count each pair once
    // core.strand == 0 (f/+) 1 r/-
    // flag
    // bam1_qname(b)
    bam_write1(out,aln);
  }
  // 
  bam_destroy1(aln);
  bam_close(in2);  
  bam_close(out);  
  return 0;  
/*
uint8_t *old_nm = bam_aux_get(b, "NM");
90 	if (c->flag & BAM_FUNMAP) return;
91 	if (old_nm) old_nm_i = bam_aux2i(old_nm);
92 	if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
93 	else if (nm != old_nm_i) {
94 	fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm);
95 	bam_aux_del(b, old_nm);
96 	bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
97 	}
*/
}  
samfile_t *samopen(const char *fn, const char *mode, const void *aux)
{
	samfile_t *fp;
	fp = (samfile_t*)calloc(1, sizeof(samfile_t));
	if (strchr(mode, 'r')) { // read
		fp->type |= TYPE_READ;
		if (strchr(mode, 'b')) { // binary
			fp->type |= TYPE_BAM;
			fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
			if (fp->x.bam == 0) goto open_err_ret;
			fp->header = bam_header_read(fp->x.bam);
		} else { // text
			fp->x.tamr = sam_open(fn);
			if (fp->x.tamr == 0) goto open_err_ret;
			fp->header = sam_header_read(fp->x.tamr);
			if (fp->header->n_targets == 0) { // no @SQ fields
				if (aux) { // check if aux is present
					bam_header_t *textheader = fp->header;
					fp->header = sam_header_read2((const char*)aux);
					if (fp->header == 0) goto open_err_ret;
					append_header_text(fp->header, textheader->text, textheader->l_text);
					bam_header_destroy(textheader);
				}
				if (fp->header->n_targets == 0 && bam_verbose >= 1)
					fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
			} //else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
		}
	} else if (strchr(mode, 'w')) { // write
		fp->header = bam_header_dup((const bam_header_t*)aux);
		if (strchr(mode, 'b')) { // binary
			char bmode[3];
			int i, compress_level = -1;
			for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
			if (mode[i]) compress_level = mode[i] - '0';
			if (strchr(mode, 'u')) compress_level = 0;
			bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
			fp->type |= TYPE_BAM;
			fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
			if (fp->x.bam == 0) goto open_err_ret;
			bam_header_write(fp->x.bam, fp->header);
		} else { // text
			// open file
			fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
			if (fp->x.tamw == 0) goto open_err_ret;
			if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
			else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
			else fp->type |= BAM_OFDEC<<2;
			// write header
			if (strchr(mode, 'h')) {
				int i;
				bam_header_t *alt;
				// parse the header text 
				alt = bam_header_init();
				alt->l_text = fp->header->l_text; alt->text = fp->header->text;
				sam_header_parse(alt);
				alt->l_text = 0; alt->text = 0;
				// check if there are @SQ lines in the header
				fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL
				if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
					if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
						fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
				} else { // then dump ->target_{name,len}
					for (i = 0; i < fp->header->n_targets; ++i)
						fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
				}
				bam_header_destroy(alt);
			}
		}
	}
	return fp;

open_err_ret:
	free(fp);
	return 0;
}
Esempio n. 20
0
// FIX MRNM and unaligned reads
int main(int argc, char *argv[])  
{  
  bamFile in; 
  long num_unmapped=0;
  long num_alns_pe=0;

  if (argc != 3) {  
    fprintf(stderr, "Usage: bam_tophat2_pe_fix <in.bam> <out.bam>\n");  
    return 1;  
  }  

  in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb"); 
  if (in == 0) {  
    fprintf(stderr, "ERROR: Fail to open input BAM file %s\n", argv[1]);  
    return 1;  
  }  
  

  int ref;  
  unsigned long num_alns=0;
  // counts
  unsigned long unalign_mapq_fix=0;
  unsigned long mtid_fix=0;
  unsigned long mpos_fix=0;
  bamFile out; 
  bam_header_t *header;
  header = bam_header_read(in);
  bam1_t *aln=bam_init1();

  out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); 
  if (out == 0) {  
    fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]);  
    return 1;  
  }  
  bam_header_write(out,header);

  while(bam_read1(in,aln)>=0) {
    ++num_alns;
    if (aln->core.tid < 0) { 
      // unaligned reads
      if ( aln->core.qual!=0 ) {
	//fprintf(stderr, "ERROR: Unaligned read with quality > 0 in line %lu\n",num_alns);
	aln->core.qual=0;
	unalign_mapq_fix++;
      }
    }
    //fprintf(stderr,"%s %c %d %d\n",bam1_qname(aln),(aln->core.tid<0?'U':'M'),aln->core.mtid,aln->core.mpos);
    if ( aln->core.flag & BAM_FPAIRED ) {
      //fprintf(stderr,"paired %d\n",(aln->core.flag & BAM_FMUNMAP));
      // paired
      if ( aln->core.mtid <0 && !(aln->core.flag & BAM_FMUNMAP) ) {
	aln->core.flag |= BAM_FMUNMAP;
	aln->core.mpos=-1;
	mtid_fix++;
      }
      if ( aln->core.mpos <0 && !(aln->core.flag & BAM_FMUNMAP) ) {
	aln->core.flag |= BAM_FMUNMAP;
	aln->core.mtid=-1;
	mpos_fix++;
      }
    }
    
    bam_write1(out,aln);

  }
  bam_destroy1(aln);
  bam_close(in);  
  bam_close(out);  
  // 
  fprintf(stderr,"unaligned MAPQ fixes: %lu\n",unalign_mapq_fix);
  fprintf(stderr,"unaligned mtid fixes: %lu\n",mtid_fix);
  fprintf(stderr,"unaligned mpos fixes: %lu\n",mpos_fix);
  return 0;  
}