Exemple #1
0
static inline void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref)
{
	int j;
	if (p->is_head) {
		putchar('^');
		putchar(p->b->core.qual > 93? 126 : p->b->core.qual + 33);
	}
	if (!p->is_del) {
		int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
		if (ref) {
			int rb = pos < ref_len? ref[pos] : 'N';
			if (c == '=' || bam_nt16_table[c] == bam_nt16_table[rb]) c = bam1_strand(p->b)? ',' : '.';
			else c = bam1_strand(p->b)? tolower(c) : toupper(c);
		} else {
			if (c == '=') c = bam1_strand(p->b)? ',' : '.';
			else c = bam1_strand(p->b)? tolower(c) : toupper(c);
		}
		putchar(c);
	} else putchar(p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*');
	if (p->indel > 0) {
		putchar('+'); printw(p->indel, stdout);
		for (j = 1; j <= p->indel; ++j) {
			int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
			putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
		}
	} else if (p->indel < 0) {
		printw(p->indel, stdout);
		for (j = 1; j <= -p->indel; ++j) {
			int c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
			putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
		}
	}
	if (p->is_tail) putchar('$');
}
Exemple #2
0
static int fetch_disc_read_callback(const bam1_t* alignment, void* data) {
    //    MEI_data* mei_data = static_cast<MEI_data*>(data);
    std::pair<MEI_data*, UserDefinedSettings*>* env = static_cast<std::pair<MEI_data*, UserDefinedSettings*>*>(data);
    MEI_data* mei_data = env->first;
    UserDefinedSettings* userSettings = env->second;
    if (!(alignment->core.flag & BAM_FUNMAP || alignment->core.flag & BAM_FMUNMAP) && // Both ends are mapped.
        !is_concordant(alignment, mei_data->current_insert_size) &&                   // Ends map discordantly.
        // Extra check for (very) large mapping distance.  This is done beside the check for read
        // discordance to speed up computation by ignoring signals from small structural variants.
        (alignment->core.tid != alignment->core.mtid ||
         abs(alignment->core.pos - alignment->core.mpos) > userSettings->MIN_DD_MAP_DISTANCE)) {
            
            // Save alignment as simple_read object.
            std::string read_name = enrich_read_name(bam1_qname(alignment), alignment->core.flag & BAM_FREAD1);
            char strand = bam1_strand(alignment)? Minus : Plus;
            char mate_strand = bam1_mstrand(alignment)? Minus : Plus;
            std::string read_group;
            get_read_group(alignment, read_group);
            std::string sample_name;
            get_sample_name(read_group, mei_data->sample_names, sample_name);
            
            simple_read* read = new simple_read(read_name, alignment->core.tid, alignment->core.pos, strand, sample_name,
                                                get_sequence(bam1_seq(alignment), alignment->core.l_qseq),
                                                alignment->core.mtid, alignment->core.mpos, mate_strand);
            mei_data->discordant_reads.push_back(read);
        }
    return 0;
}
Exemple #3
0
void bam_print(bam1_t* bam_p, int base_quality) {
    printf("\n------------------------------------------------------------------->\n");
    printf("bam_p->data (qname): %s\n", bam1_qname(bam_p));
    printf("bam_p->data (seq):  %s\n", convert_to_sequence_string(bam1_seq(bam_p), bam_p->core.l_qseq));

    //quality
    printf("bam_p->data (qual): ");

    char* quality = (char*) bam1_qual(bam_p);

    for (int i = 0; i < bam_p->core.l_qseq; i++) {
        printf("%c", (quality[i] + base_quality));
    }
    printf("\n");

    printf("bam_p->data (cigar): %s\n", convert_to_cigar_string(bam1_cigar(bam_p), bam_p->core.n_cigar));

    //aux(optional) data
    printf("bam_p->data (aux): ");

    char* optional_fields = (char*) bam1_aux(bam_p);

    for (int i = 0; i < bam_p->l_aux; i++) {
        printf("%c", optional_fields[i]);
    }
    printf("\n");

    //lengths
    printf("bam_p->l_aux: %i\n", bam_p->l_aux);
    printf("bam_p->data_len: %i\n", bam_p->data_len);
    printf("bam_p->m_data: %i\n", bam_p->m_data);

    //core
    printf("bam_p->core.tid: %i\n", bam_p->core.tid);
    printf("bam_p->core.pos: %i\n", bam_p->core.pos);
    printf("bam_p->core.bin: %u\n", bam_p->core.bin);
    printf("bam_p->core.qual: %u\n", bam_p->core.qual);
    printf("bam_p->core.l_qname: %u\n", bam_p->core.l_qname);
    printf("bam_p->core.flag (16 bits): %u\n", bam_p->core.flag);
    printf("bam_p->core.n_cigar: %u\n", bam_p->core.n_cigar);
    printf("bam_p->core.l_qseq: %i\n", bam_p->core.l_qseq);
    printf("bam_p->core.mtid: %i\n", bam_p->core.mtid);
    printf("bam_p->core.mpos: %i\n", bam_p->core.mpos);
    printf("bam_p->core.isize: %i\n", bam_p->core.isize);

    printf("\nbam1_t.core flags\n");
    printf("-----------------------\n");
    printf("flag (is_paired_end): %i\n", (bam_p->core.flag & BAM_FPAIRED) ? 1 : 0);
    printf("flag (is_paired_end_mapped): %i\n", (bam_p->core.flag & BAM_FPROPER_PAIR) ? 1 : 0);
    printf("flag (is_seq_unmapped): %i\n", (bam_p->core.flag & BAM_FUNMAP) ? 1 : 0);
    printf("flag (is_mate_unmapped): %i\n", (bam_p->core.flag & BAM_FMUNMAP) ? 1 : 0);
    printf("flag (seq_strand): %i\n", (bam_p->core.flag & BAM_FREVERSE) ? 1 : 0);
    printf("flag (mate_strand): %i\n", (bam_p->core.flag & BAM_FMREVERSE) ? 1 : 0);
    printf("flag (pair_num_1): %i\n", (bam_p->core.flag & BAM_FREAD1) ? 1 : 0);
    printf("flag (pair_num_2): %i\n", (bam_p->core.flag & BAM_FREAD2) ? 1 : 0);
    printf("flag (primary_alignment): %i\n", (bam_p->core.flag & BAM_FSECONDARY) ? 1 : 0);
    printf("flag (fails_quality_check): %i\n", (bam_p->core.flag & BAM_FQCFAIL) ? 1 : 0);
    printf("flag (pc_optical_duplicate): %i\n", (bam_p->core.flag & BAM_FDUP) ? 1 : 0);
}
Exemple #4
0
alignment_t* alignment_new_by_bam(bam1_t* bam_p, int base_quality) {
    //memory allocation for the structure
    alignment_t* alignment_p = (alignment_t*) calloc(1, sizeof(alignment_t));

    //numeric data
    alignment_p->num_cigar_operations = (int) bam_p->core.n_cigar;
    alignment_p->chromosome = bam_p->core.tid;
    alignment_p->position = bam_p->core.pos;
    alignment_p->mate_chromosome = bam_p->core.mtid;
    alignment_p->mate_position = bam_p->core.mpos;
    alignment_p->map_quality = bam_p->core.qual;
    alignment_p->template_length = bam_p->core.isize;

    //memory allocation for inner fields according to indicated sizes
    alignment_p->query_name = (char*) calloc(bam_p->core.l_qname, sizeof(char));
    alignment_p->sequence = (char*) calloc(bam_p->core.l_qseq + 1, sizeof(char));
    alignment_p->quality = (char*) calloc(bam_p->core.l_qseq + 1, sizeof(char));   //same length as sequence
    alignment_p->cigar = (char*) calloc(max(MIN_ALLOCATED_SIZE_FOR_CIGAR_STRING, alignment_p->num_cigar_operations << 2), sizeof(char));
    alignment_p->optional_fields = (uint8_t*) calloc(bam_p->l_aux, sizeof(uint8_t));
    alignment_p->optional_fields_length = bam_p->l_aux;

    //copy the data between structures
    strcpy(alignment_p->query_name, bam1_qname(bam_p));
    strcpy(alignment_p->sequence, convert_to_sequence_string(bam1_seq(bam_p), bam_p->core.l_qseq));

    //char* quality_string = (char *)malloc(sizeof(char)*(quality_length + 1));
    convert_to_quality_string_length(alignment_p->quality, bam1_qual(bam_p), bam_p->core.l_qseq, base_quality);
    //strcpy(alignment_p->quality, quality_string);
    //free(quality_string);

    strcpy(alignment_p->cigar, convert_to_cigar_string(bam1_cigar(bam_p), alignment_p->num_cigar_operations));
    memcpy(alignment_p->optional_fields, bam1_aux(bam_p), bam_p->l_aux);

    //flags
    uint32_t flag = (uint32_t) bam_p->core.flag;
    alignment_p->is_paired_end = (flag & BAM_FPAIRED) ? 1 : 0;
    alignment_p->is_paired_end_mapped = (flag & BAM_FPROPER_PAIR) ? 1 : 0;
    alignment_p->is_seq_mapped = (flag & BAM_FUNMAP) ? 0 : 1; //in bam structure is negative flag!!!
    alignment_p->is_mate_mapped = (flag & BAM_FMUNMAP) ? 0 : 1; //in bam structure is negative flag!!!
    alignment_p->seq_strand = (flag & BAM_FREVERSE) ? 1 : 0;
    alignment_p->mate_strand = (flag & BAM_FMREVERSE) ? 1 : 0;

    if (flag & BAM_FREAD1) {
        alignment_p->pair_num = 1;
    } else if (flag & BAM_FREAD2) {
        alignment_p->pair_num = 2;
    } else {
        alignment_p->pair_num = 0;
    }

    alignment_p->primary_alignment = (flag & BAM_FSECONDARY) ? 1 : 0;
    alignment_p->fails_quality_check = (flag & BAM_FQCFAIL) ? 1 : 0;
    alignment_p->pc_optical_duplicate = (flag & BAM_FDUP) ? 1 : 0;

    return alignment_p;
}
Exemple #5
0
/*!
 @abstract     Get the color error profile at the give position    
 @param b      pointer to an alignment
 @return       the original color if the color was an error, '-' (dash) otherwise

 @discussion   Returns 0 no color information is found.
 */
char bam_aux_getCEi(bam1_t *b, int i)
{
	int cs_i;
	uint8_t *c = bam_aux_get(b, "CS");
	char *cs = NULL;
	char prev_b, cur_b;
	char cur_color, cor_color;

	// return the base if the tag was not found
	if(0 == c) return 0;
	
	cs = bam_aux2Z(c);

	// adjust for strandedness and leading adaptor
	if(bam1_strand(b)) { //reverse strand
		cs_i = strlen(cs) - 1 - i;
		// get current color
		cur_color = cs[cs_i];
		// get previous base.  Note: must rc adaptor
		prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)];
		// get current base
		cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; 
	}
	else {
		cs_i=i+1;
		// get current color
		cur_color = cs[cs_i];
		// get previous base
		prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)];
		// get current base
		cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
	}

	// corrected color
	cor_color = bam_aux_ntnt2cs(prev_b, cur_b);

	if(cur_color == cor_color) { 
		return '-';
	}
	else {
		return cur_color;
	}
}
Exemple #6
0
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	int n_seqs, l, i;
	long n_trimmed = 0, n_tot = 0;
	bam1_t *b;
	int res;

	b = bam_init1();
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((res = bam_read1(bs->fp, b)) >= 0) {
		uint8_t *s, *q;
		int go = 0;
		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
		if (go == 0) continue;
		l = b->core.l_qseq;
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		s = bam1_seq(b); q = bam1_qual(b);
		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
		for (i = 0; i != p->full_len; ++i) {
			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
		}
		if (bam1_strand(b)) { // then reverse 
			seq_reverse(p->len, p->seq, 1);
			seq_reverse(p->len, p->qual, 0);
		}
		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)bam1_qname(b));
		if (n_seqs == n_needed) break;
	}
	if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		bam_destroy1(b);
		return 0;
	}
	bam_destroy1(b);
	return seqs;
}
Exemple #7
0
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
{
	uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
	int i;
	const bam1_core_t *c = &b->core;
	kstring_t str;
	str.l = str.m = 0; str.s = 0;

	ksprintf(&str, "%s\t", bam1_qname(b));
	if (of == BAM_OFDEC) ksprintf(&str, "%d\t", c->flag);
	else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag);
	else { // BAM_OFSTR
		for (i = 0; i < 16; ++i)
			if ((c->flag & 1<<i) && bam_flag2char_table[i])
				kputc(bam_flag2char_table[i], &str);
		kputc('\t', &str);
	}
	if (c->tid < 0) kputs("*\t", &str);
	else ksprintf(&str, "%s\t", header->target_name[c->tid]);
	ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual);
	if (c->n_cigar == 0) kputc('*', &str);
	else {
		for (i = 0; i < c->n_cigar; ++i)
			ksprintf(&str, "%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]);
	}
	kputc('\t', &str);
	if (c->mtid < 0) kputs("*\t", &str);
	else if (c->mtid == c->tid) kputs("=\t", &str);
	else ksprintf(&str, "%s\t", header->target_name[c->mtid]);
	ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize);
	if (c->l_qseq) {
		for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
		kputc('\t', &str);
		if (t[0] == 0xff) kputc('*', &str);
		else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
	} else ksprintf(&str, "*\t*");
	s = bam1_aux(b);
	while (s < b->data + b->data_len) {
		uint8_t type, key[2];
		key[0] = s[0]; key[1] = s[1];
		s += 2; type = *s; ++s;
		ksprintf(&str, "\t%c%c:", key[0], key[1]);
		if (type == 'A') { ksprintf(&str, "A:%c", *s); ++s; }
		else if (type == 'C') { ksprintf(&str, "i:%u", *s); ++s; }
		else if (type == 'c') { ksprintf(&str, "i:%d", *(int8_t*)s); ++s; }
		else if (type == 'S') { ksprintf(&str, "i:%u", *(uint16_t*)s); s += 2; }
		else if (type == 's') { ksprintf(&str, "i:%d", *(int16_t*)s); s += 2; }
		else if (type == 'I') { ksprintf(&str, "i:%u", *(uint32_t*)s); s += 4; }
		else if (type == 'i') { ksprintf(&str, "i:%d", *(int32_t*)s); s += 4; }
		else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
		else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
		else if (type == 'Z' || type == 'H') { ksprintf(&str, "%c:", type); while (*s) kputc(*s++, &str); ++s; }
	}
	return str.s;
}
Exemple #8
0
        std::string getSeq() const
        {
            assert(m_dataPtr);
            uint8_t *bam_seq = bam1_seq(m_dataPtr);
            std::ostringstream seq;

            if (bam_seq)
            {
                size_t qlen = m_dataPtr->core.l_qseq;
                for (size_t i=0; i<qlen; i++)
                    seq << bam_nt16_rev_table[bam1_seqi(bam_seq, i)];
            }
            return seq.str();
        }
Exemple #9
0
	char *getSeqArr() 
	{
	    if (seqs) return seqs;
	    assert(m_dataPtr);
            uint8_t *bam_seq = bam1_seq(m_dataPtr);

            if (bam_seq)
            {
                size_t qlen = m_dataPtr->core.l_qseq;
		seqs = new char[qlen+1];
                for (size_t i=0; i<qlen; i++)
                    seqs[i] =  bam_nt16_rev_table[bam1_seqi(bam_seq, i)];
            }
            return seqs;
	}
Exemple #10
0
void get_dqstats(
        const bam_pileup1_t* buf,
        int n_reads,
        int ref_base,
        uint32_t wanted_bases,
        dqstats_t *dqs
        )
{
    int i, j;
    int base;

    memset(dqs, 0, sizeof(dqstats_t));

    for (i = 0; i < n_reads; ++i) {
        if (buf[i].is_del || buf[i].b->core.flag&BAM_FUNMAP)
            continue;

        ++dqs->total_depth;
        dqs->total_mean_mapQ += buf[i].b->core.qual;

        base = bam1_seqi(bam1_seq(buf[i].b), buf[i].qpos);
        if (base == ref_base)
            ++dqs->dp4[bam1_strand(buf[i].b)];
        else
            ++dqs->dp4[2+bam1_strand(buf[i].b)];

        for (j = 0; j < 4; ++j) {
            int value = 1 << j;
            if ((base & value) == base) {
                ++dqs->base_occ[j];
                if (value & wanted_bases) {
                    dqs->mean_baseQ[j] += bam1_qual(buf[i].b)[buf[i].qpos];
                    dqs->mean_mapQ[j] += buf[i].b->core.qual;
                }
            }
        }
    }

    for (i = 0; i < 4; ++i) {
        if (dqs->base_occ[i] > 0) {
            dqs->mean_baseQ[i] = dqs->mean_baseQ[i]/(double)dqs->base_occ[i] + .499;
            dqs->mean_mapQ[i] = dqs->mean_mapQ[i]/(double)dqs->base_occ[i] + .499;
        }
    }

    if (dqs->total_depth > 0)
        dqs->total_mean_mapQ = dqs->total_mean_mapQ / (double)dqs->total_depth + .499;
}
Exemple #11
0
void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq)
/* Fill in qSeq with the nucleotide sequence encoded in bam.  The BAM format
 * reverse-complements query sequence when the alignment is on the - strand,
 * so if useStrand is given we rev-comp it back to restore the original query
 * sequence. */
{
    const bam1_core_t *core = &bam->core;
    int qLen = core->l_qseq;
    uint8_t *packedQSeq = bam1_seq(bam);
    int i;
    for (i = 0; i < qLen; i++)
        qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)];
    qSeq[i] = '\0';
    if (useStrand && bamIsRc(bam))
        reverseComplement(qSeq, qLen);
}
Exemple #12
0
static char *_bamseq(const bam1_t * bam, BAM_DATA bd)
{
    static const char key[] = {
        '-', 'A', 'C', 'M', 'G', 'R', 'S', 'V',
        'T', 'W', 'Y', 'H', 'K', 'D', 'B', 'N'
    };

    const uint32_t len = bam->core.l_qseq;
    const unsigned char *seq = bam1_seq(bam);
    char *s = Calloc(len + 1, char);
    for (uint32_t i = 0; i < len; ++i)
        s[i] = key[bam1_seqi(seq, i)];
    if (bd->reverseComplement && (bam1_strand(bam) == 1))
        _reverseComplement(s, len);
    s[len] = '\0';
    return s;
}
Exemple #13
0
int32_t
tmap_sam_io_read(tmap_sam_io_t *samio, tmap_sam_t *sam)
{
  if(NULL != sam->b) {
      bam_destroy1(sam->b);
  }
  sam->b = bam_init1();

  // check if we're past optional end bam virtual file offset
  if (samio->bam_end_vfo > 0) {
      BGZF* bgzf_fp = samio->fp->x.bam;
      if (bam_tell(bgzf_fp) >= samio->bam_end_vfo) {
         fprintf(stderr, "stopping at bam virtual file offset %lu\n", samio->bam_end_vfo);
         return -1;
      }
  }

  if(0 < samread(samio->fp, sam->b)) {
      char *str;
      int32_t i, len;

      // name
      str = bam1_qname(sam->b);
      len = strlen(str);
      tmap_sam_io_update_string(&sam->name, str, len);
      sam->name->s[len] = '\0';
      // seq and qual
      len = sam->b->core.l_qseq;
      tmap_sam_io_update_string(&sam->seq, NULL, len);
      tmap_sam_io_update_string(&sam->qual, (char*)bam1_qual(sam->b), len);
      for(i=0;i<len;i++) {
          sam->seq->s[i] = bam_nt16_rev_table[bam1_seqi(bam1_seq(sam->b), i)];
          sam->qual->s[i] = QUAL2CHAR(sam->qual->s[i]);
      }
      sam->seq->s[len] = sam->qual->s[len] = '\0';
      // reverse compliment if necessary
      if((sam->b->core.flag & BAM_FREVERSE)) {
          tmap_sam_reverse_compliment(sam);
      }
      return 1;
  }
  
  return -1;
}
Exemple #14
0
/* callback for bam_fetch() */
static int fetch_func(const bam1_t *b)
{
    const bam1_core_t *c = &b->core;
    int i;
    char* read_name=(char*) bam1_qname(b);
    printf("%s\t",read_name);
    char* read_seq=(char*)malloc(c->l_qseq+1);
    char* s=(char*) bam1_seq(b);
    for(i=0;i<c->l_qseq;i++) read_seq[i]=bam_nt16_rev_table[bam1_seqi(s,i)];
    read_seq[i]=0;
    printf("%s\t",read_seq);
    char* read_qual=(char*)malloc(c->l_qseq+1);
    char* t=(char*) bam1_qual(b);    
    for(i=0;i<c->l_qseq;i++) read_qual[i]=t[i]+33;
    read_qual[i]=0;
    printf("%s\n",read_qual);
    free(read_seq); free(read_qual);
    return 0;
}
Exemple #15
0
/**
 * Get string containing bam1 sequence nucleotides.
 */
ERROR_CODE
new_sequence_from_bam_ref(bam1_t *bam1, char *seq, uint32_t max_l)
{
	char *bam_seq = (char *)bam1_seq(bam1);
	int seq_len = bam1->core.l_qseq;
	int i;

	if(seq_len > max_l)
		seq_len = max_l;

	// nucleotide content
	for (i = 0; i < seq_len; i++) {
		switch (bam1_seqi(bam_seq, i))
		{
		case 1:
			seq[i] = 'A';
			break;
		case 2:
			seq[i] = 'C';
			break;
		case 4:
			seq[i] = 'G';
			break;
		case 8:
			seq[i] = 'T';
			break;
		case 15:
			seq[i] = 'N';
			//printf("N");
			break;
		default:
			seq[i] = 'N';
			break;
		}
	}

	if(max_l > seq_len)
		seq[i] = '\0';

	return NO_ERROR;
}
Exemple #16
0
static void unpad_seq(bam1_t *b, kstring_t *s)
{
	int k, j, i;
	uint32_t *cigar = bam1_cigar(b);
	uint8_t *seq = bam1_seq(b);
	ks_resize(s, b->core.l_qseq);
	for (k = 0, s->l = 0, j = 0; k < b->core.n_cigar; ++k) {
		int op, ol;
		op = bam_cigar_op(cigar[k]);
		ol = bam_cigar_oplen(cigar[k]);
		assert(op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CSOFT_CLIP);
		if (op == BAM_CMATCH) {
			for (i = 0; i < ol; ++i) s->s[s->l++] = bam1_seqi(seq, j);
			++j;
		} else if (op == BAM_CSOFT_CLIP) {
			j += ol;
		} else {
			for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
		}
	}
}
void gt_sam_alignment_sequence_external_buffer(GtSamAlignment *sam_alignment,
    GtUchar **seq_buffer, unsigned long *bufsize)
{
  unsigned long query_len, idx;
  uint8_t *bam_seq;

  query_len = (unsigned long) sam_alignment->s_alignment->core.l_qseq;
  if ((*bufsize) < query_len) {
    (*seq_buffer) = gt_realloc(*seq_buffer, sizeof (**seq_buffer) *
                                   (query_len + 1UL));
    (*bufsize) = query_len;
  }
  gt_assert((*seq_buffer) != NULL);

  bam_seq = bam1_seq(sam_alignment->s_alignment);

  for (idx = 0UL; idx < query_len; idx++) {
    (*seq_buffer)[idx] = bambase2gtbase((uint8_t) bam1_seqi(bam_seq, idx),
                                      sam_alignment->alphabet);
  }
  (*seq_buffer)[query_len] = (GtUchar)'\0';
}
Exemple #18
0
// Mostly stolen from bwa_read_bam.
void bam1_to_seq(bam1_t *raw, bwa_seq_t *p, int is_comp, int trim_qual)
{
    // long n_trimmed = 0;

    uint8_t *s, *q;
    int i, l = raw->core.l_qseq;
    p->tid = -1; // no assigned to a thread
    p->qual = 0;
    p->full_len = p->clip_len = p->len = l;
    // n_tot += p->full_len;
    s = bam1_seq(raw);
    q = bam1_qual(raw);
    p->seq = (ubyte_t*)calloc(p->len + 1, 1);
    p->qual = (ubyte_t*)calloc(p->len + 1, 1);
    for (i = 0; i != p->full_len; ++i) {
        p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
        p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
    }
    if (bam1_strand(raw)) { // then reverse
        seq_reverse(p->len, p->seq, 1);
        seq_reverse(p->len, p->qual, 0);
    }
    if (trim_qual >= 1) /* n_trimmed += */ bwa_trim_read(trim_qual, p);
    p->rseq = (ubyte_t*)calloc(p->full_len, 1);
    memcpy(p->rseq, p->seq, p->len);
    seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
    seq_reverse(p->len, p->rseq, is_comp);
    p->max_entries = 0 ;

    // We don't set a name, it's contained in the original record
    // anyway.
    // p->name = strdup((const char*)bam1_qname(raw));

    // No place to put the tally right now.
    // if (n_seqs && trim_qual >= 1)
    // fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
}
Exemple #19
0
/**
 * Get string containing bam1 sequence nucleotides.
 */
char *
new_sequence_from_bam(bam1_t *bam1)
{
	char *seq;
	char *bam_seq = (char *)bam1_seq(bam1);
	int seq_len = bam1->core.l_qseq;

	seq = (char *) malloc(seq_len * sizeof(char));

	// nucleotide content
	for (int i = 0; i < seq_len; i++) {
		switch (bam1_seqi(bam_seq, i))
		{
		case 1:
			seq[i] = 'A';
			break;
		case 2:
			seq[i] = 'C';
			break;
		case 4:
			seq[i] = 'G';
			break;
		case 8:
			seq[i] = 'T';
			break;
		case 15:
			seq[i] = 'N';
			//printf("N");
			break;
		default:
			seq[i] = 'N';
			break;
		}
	}

	return seq;
}
Exemple #20
0
static int sum_err(int *n, const bam_pileup1_t **plp, mc_aux_t *ma)
{
	int i, j, tot = 0;
	memset(ma->qsum, 0, sizeof(int) * 4 * ma->n);
	memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n);
	for (j = 0; j < ma->n; ++j) {
		int *qsum = ma->qsum + j * 4;
		int *bcnt = ma->bcnt + j * 4;
		for (i = 0; i < n[j]; ++i) {
			const bam_pileup1_t *p = plp[j] + i;
			int q, b;
			if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue;
			q = bam1_qual(p->b)[p->qpos];
			if (p->b->core.qual < q) q = p->b->core.qual;
			if (q < MC_MIN_QUAL) continue; // small qual
			b = bam_nt16_nt4_table[(int)bam1_seqi(bam1_seq(p->b), p->qpos)];
			if (b > 3) continue; // N
			qsum[b] += q;
			++bcnt[b];
			++tot;
		}
	}
	return tot;
}
Exemple #21
0
bam_stats_t *bam1_stats(bam1_t *bam1, bam_stats_options_t *opts) {
  
  bam_stats_t *bam_stats = NULL;
  uint32_t bam_flag = (uint32_t) bam1->core.flag;

  if (bam_flag & BAM_FUNMAP) {
    // not mapped, then return
    bam_stats = bam_stats_new();
    bam_stats->mapped = 0;
    return bam_stats;
  }

  if (opts->region_table) {
    region_t region;
    region.chromosome = opts->sequence_labels[bam1->core.tid];
    region.start_position = bam1->core.pos;
    region.end_position = region.start_position + bam1->core.l_qseq;
    region.strand = NULL;
    region.type = NULL;
    
    if (find_region(&region, opts->region_table)) {
      bam_stats = bam_stats_new();
    } else {
      return NULL;
    }
  } else {
    bam_stats = bam_stats_new();
  }

  // mapped !!
  bam_stats->mapped = 1;
  
  bam_stats->strand = (int) ((bam_flag & BAM_FREVERSE) > 0);
  
  // number of errors
  bam_stats->num_errors = bam_aux2i(bam_aux_get(bam1, "NM"));
  
  // cigar handling: number of indels and length
  uint32_t cigar_int, *cigar = bam1_cigar(bam1);
  int num_cigar_ops = (int) bam1->core.n_cigar; 
  for (int j = 0; j < num_cigar_ops; j++) {
    cigar_int = cigar[j];
    switch (cigar_int & BAM_CIGAR_MASK) {
    case BAM_CINS:  //I: insertion to the reference
    case BAM_CDEL:  //D: deletion from the reference
      bam_stats->num_indels++;
      bam_stats->indels_length += (cigar_int >> BAM_CIGAR_SHIFT);
      break;
    }
  }

  // quality
  bam_stats->quality = bam1->core.qual;

  // unique alignment
  if (!(bam_flag & BAM_FSECONDARY)) {
    bam_stats->unique_alignment = 1;
  }

  // handling pairs
  bam_stats->single_end = 1;
  if (bam_flag & BAM_FPAIRED) {
    bam_stats->single_end = 0;    
    if (bam_flag & BAM_FUNMAP) {
      if (bam_flag & BAM_FREAD1) {
	bam_stats->unmapped_pair_1 = 1;
      } else {
	bam_stats->unmapped_pair_2 = 1;
      }
    } else {
      if (bam_flag & BAM_FREAD1) {
	bam_stats->mapped_pair_1 = 1;
      } else {
	bam_stats->mapped_pair_2 = 1;
      }
    }
    
    if (!(bam_flag & BAM_FUNMAP) && !(bam_flag & BAM_FMUNMAP) && (bam_flag & BAM_FPROPER_PAIR)) { 
      bam_stats->isize = abs(bam1->core.isize);
    }
  }

  // mapping length
  char *bam_seq = bam1_seq(bam1);
  int seq_len = bam1->core.l_qseq;
  bam_stats->seq_length = seq_len;

  // nucleotide content
  for (int i = 0; i < seq_len; i++) {
    switch (bam1_seqi(bam_seq, i)) {
    case 1:
      bam_stats->num_As++;
      break;
    case 2:
      bam_stats->num_Cs++;
      break;
    case 4:
      bam_stats->num_Gs++;
      break;
    case 8:
      bam_stats->num_Ts++;
      break;
    case 15:
      bam_stats->num_Ns++;
      break;
    }
  }
  bam_stats->num_GCs = bam_stats->num_Gs + bam_stats->num_Cs;

  return bam_stats;
}
Exemple #22
0
static int mpileup(mplp_conf_t *conf, int n, char **fn)
{
	extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
	extern void bcf_call_del_rghash(void *rghash);
	mplp_aux_t **data;
	int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth;
	const bam_pileup1_t **plp;
	bam_mplp_t iter;
	bam_header_t *h = 0;
	char *ref;
	void *rghash = 0;

	bcf_callaux_t *bca = 0;
	bcf_callret1_t *bcr = 0;
	bcf_call_t bc;
	bcf_t *bp = 0;
	bcf_hdr_t *bh = 0;

	bam_sample_t *sm = 0;
	kstring_t buf;
	mplp_pileup_t gplp;

	memset(&gplp, 0, sizeof(mplp_pileup_t));
	memset(&buf, 0, sizeof(kstring_t));
	memset(&bc, 0, sizeof(bcf_call_t));
	data = calloc(n, sizeof(void*));
	plp = calloc(n, sizeof(void*));
	n_plp = calloc(n, sizeof(int*));
	sm = bam_smpl_init();

	// read the header and initialize data
	for (i = 0; i < n; ++i) {
		bam_header_t *h_tmp;
		data[i] = calloc(1, sizeof(mplp_aux_t));
		data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r");
		data[i]->conf = conf;
		h_tmp = bam_header_read(data[i]->fp);
		data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet
		bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text);
		rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list);
		if (conf->reg) {
			int beg, end;
			bam_index_t *idx;
			idx = bam_index_load(fn[i]);
			if (idx == 0) {
				fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1);
				exit(1);
			}
			if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) {
				fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1);
				exit(1);
			}
			if (i == 0) tid0 = tid, beg0 = beg, end0 = end;
			data[i]->iter = bam_iter_query(idx, tid, beg, end);
			bam_index_destroy(idx);
		}
		if (i == 0) h = h_tmp;
		else {
			// FIXME: to check consistency
			bam_header_destroy(h_tmp);
		}
	}
	gplp.n = sm->n;
	gplp.n_plp = calloc(sm->n, sizeof(int));
	gplp.m_plp = calloc(sm->n, sizeof(int));
	gplp.plp = calloc(sm->n, sizeof(void*));

	fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
	// write the VCF header
	if (conf->flag & MPLP_GLF) {
		kstring_t s;
		bh = calloc(1, sizeof(bcf_hdr_t));
		s.l = s.m = 0; s.s = 0;
		bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w");
		for (i = 0; i < h->n_targets; ++i) {
			kputs(h->target_name[i], &s);
			kputc('\0', &s);
		}
		bh->l_nm = s.l;
		bh->name = malloc(s.l);
		memcpy(bh->name, s.s, s.l);
		s.l = 0;
		for (i = 0; i < sm->n; ++i) {
			kputs(sm->smpl[i], &s); kputc('\0', &s);
		}
		bh->l_smpl = s.l;
		bh->sname = malloc(s.l);
		memcpy(bh->sname, s.s, s.l);
		bh->txt = malloc(strlen(BAM_VERSION) + 64);
		bh->l_txt = 1 + sprintf(bh->txt, "##samtoolsVersion=%s\n", BAM_VERSION);
		free(s.s);
		bcf_hdr_sync(bh);
		bcf_hdr_write(bp, bh);
		bca = bcf_call_init(-1., conf->min_baseQ);
		bcr = calloc(sm->n, sizeof(bcf_callret1_t));
		bca->rghash = rghash;
		bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
		bca->min_frac = conf->min_frac;
		bca->min_support = conf->min_support;
	}
	if (tid0 >= 0 && conf->fai) { // region is set
		ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len);
		ref_tid = tid0;
		for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0;
	} else ref_tid = -1, ref = 0;
	iter = bam_mplp_init(n, mplp_func, (void**)data);
	max_depth = conf->max_depth;
	if (max_depth * sm->n > 1<<20)
		fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
	if (max_depth * sm->n < 8000) {
		max_depth = 8000 / sm->n;
		fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
	}
	max_indel_depth = conf->max_indel_depth * sm->n;
	bam_mplp_set_maxcnt(iter, max_depth);


	int storeSize = 100;

	int delStore[2][100] = {{0},{0}};

	typedef char * mstring;

	while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
		if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
		if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
		if (tid != ref_tid) {
			free(ref); ref = 0;
			if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len);
			for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid;
			ref_tid = tid;
		}
		if (conf->flag & MPLP_GLF) {
			int total_depth, _ref0, ref16;
			bcf1_t *b = calloc(1, sizeof(bcf1_t));
			for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
			group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
			_ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
			ref16 = bam_nt16_table[_ref0];
			for (i = 0; i < gplp.n; ++i)
				bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
			bcf_call_combine(gplp.n, bcr, ref16, &bc);
			bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
						 (conf->flag&MPLP_FMT_SP), 0, 0);
			bcf_write(bp, bh, b);
			bcf_destroy(b);
			// call indels
			if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
				for (i = 0; i < gplp.n; ++i)
					bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
				if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) {
					b = calloc(1, sizeof(bcf1_t));
					bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
								 (conf->flag&MPLP_FMT_SP), bca, ref);
					bcf_write(bp, bh, b);
					bcf_destroy(b);
				}
			}
		} else {
			printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
			for (i = 0; i < n; ++i) {
				int j;
				printf("\t%d\t", n_plp[i]);
				if (n_plp[i] == 0) {
					printf("*\t*"); // FIXME: printf() is very slow...
					if (conf->flag & MPLP_PRINT_POS) printf("\t*");
				} else {
					//MDW start					
					//for each position in the pileup column
					int charLen = 16;
					int countChars[ charLen ][2];
					int countiChars[ charLen ][2];

					int countGap[2]={0,0};

					//double qvTotal=0;
					int numStruck=0;
					int numGood=0;
					int tti;
					int ttj;
					mstring insAllele[100];
					int insAlleleCnt[100];
					int sf=0;
					int flag=0;

					//typedef char * string;
					char insStr0[10000];
					int iCnt0=0;

					char insStr1[10000];
					int iCnt1=0;

					char delStr0[10000];
					int dCnt0=0;

					char delStr1[10000];
					int dCnt1=0;


					float qposP[10000];
					int qposCnt=0;



					//initialize with zeros
						for(tti=0;tti<charLen;tti++){
						  countChars[tti][0]=0;
						  countChars[tti][1]=0;
						}

					// define repeat length here; look back up to 10 prior positions
					// start one position away.
					int replC=0; //
					for(tti=1;tti<=15;tti++){
						// check for greater than zero
						if(toupper(ref[pos-1])==toupper(ref[pos-tti])){
							replC++;
						}else{ // breaks the chain at first non identical to current position not strict homopolymer
							break;
						}
					}					
					int reprC=0; // 
					for(tti=1;tti<=15;tti++){
						// check for greater than zero
						if(toupper(ref[pos+1])==toupper(ref[pos+tti])){
							reprC++;
						}else{ // breaks the chain at first non identical to current position not strict homopolymer
							break;
						}
					}		
					int repT = replC;
					if(replC < reprC){
						repT=reprC;
					}



					for (j = 0; j < n_plp[i]; ++j){
						const bam_pileup1_t *p = plp[i] + j;
									
						/*
						SAME LOGIC AS pileup_seq()
						*/

						if(p->is_refskip){ // never count intron gaps in numStruck
							continue;
						}

						if(p->is_del){ // skip deletion gap, after first position which is the first aligned char
							continue;
						}

						if( 	p->b->core.qual < conf->min_mqToCount  || // mapping quality
							conf->maxrepC < (repT) || // max homopolymer run, this will not 
							(!p->is_del && bam1_qual(p->b)[p->qpos] < conf->min_baseQ) || // base quality for matches
							p->alignedQPosBeg <= (conf->trimEnd ) || p->alignedQPosEnd <= (conf->trimEnd ) ||  // trimEnd is 1-based
							p->zf == 1 || // fusion tag
							p->ih > conf->maxIH  || // max hit index
							(p->nmd > conf->maxNM) || // max mismatch
							(conf->flagFilter == 1 && !(p->b->core.flag&BAM_FPROPER_PAIR)) || // optionally keep only proper pairs
							(conf->flagFilter == 2 && p->b->core.flag&BAM_FSECONDARY) || // optionally strike secondary
							(conf->flagFilter == 3 && p->b->core.flag&BAM_FDUP) || // optionally strike dup
							(conf->flagFilter == 4 && (p->b->core.flag&BAM_FDUP || p->b->core.flag&BAM_FSECONDARY))  || // optionally strike secondary or dup
							(conf->flagFilter == 5 && (p->b->core.flag&BAM_FDUP || p->b->core.flag&BAM_FSECONDARY || p->b->core.flag&BAM_FQCFAIL || !(p->b->core.flag&BAM_FPROPER_PAIR) ))   // optionally strike secondary, dup and QCfail


						){
							numStruck++;
							continue;
						}

						
						//printf("repT=%d: %d %c %c %c %c \n",repT,p->indel,ref[pos],ref[pos-1],ref[pos-2],ref[pos-3]);


						if(!p->is_del && p->indel==0){
  						  countChars[ bam1_seqi(bam1_seq(p->b), p->qpos) ][ bam1_strand(p->b) ] ++;
						  numGood++;			

						}else if(p->is_refskip){
						  countGap[ bam1_strand(p->b) ]++;
						}
						
						if(p->indel<0){
    						  numGood++;			
						  if(bam1_strand(p->b) ==0){
							  for(tti=1;tti<= -p->indel; tti++) {
							    // current spot, starting at 0 in store, because indel<0 refers to next position
							   delStr0[dCnt0] =  ref[pos+tti];
							   dCnt0++;
							  }	
							  delStr0[dCnt0] = ',';
							  dCnt0++;
						  }else{
							  for(tti=1;tti<= -p->indel; tti++) {
							    // current spot, starting at 0 in store, because indel<0 refers to next position
							   delStr1[dCnt1] = ref[pos+tti];
							   dCnt1++;
							  }	
							  delStr1[dCnt1] = ',';
							  dCnt1++;
						  }



						}else if(p->indel>0){
						  numGood++;			

						  if(bam1_strand(p->b) ==0){
							  for(tti=1;tti<= p->indel; tti++) {
							    // current spot, starting at 0 in store, because indel<0 refers to next position
							   insStr0[iCnt0] = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + tti)];
							   iCnt0++;
							  }	
							  insStr0[iCnt0] = ',';
							  iCnt0++;
						  }else{
							  for(tti=1;tti<= p->indel; tti++) {
							    // current spot, starting at 0 in store, because indel<0 refers to next position
							   insStr1[iCnt1] = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + tti)];
							   iCnt1++;
							  }	
							  insStr1[iCnt1] = ',';
							  iCnt1++;
						  }


						}
						//calculate position of variant within aligned read - no soft clips
						if( toupper(ref[pos]) != toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]) || p->indel>0 || p->indel<0  ){

						//distance to end; calculate distance to end of aligned read.  removes soft clips.
						int distToEnd = (p->alignedQPosBeg < p->alignedQPosEnd) ? p->alignedQPosBeg : p->alignedQPosEnd;
						qposP[qposCnt] = distToEnd;						  
						qposCnt++;	
						// printf("id=%s, pos=%d",bam1_qname(p->b),distToEnd);
						}	
					}

					//

					//print A,C,G,T, by +/-
				        printf("\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d", 	countChars[1][0],countChars[1][1],
											countChars[2][0],countChars[2][1],
											countChars[4][0],countChars[4][1],
											countChars[8][0],countChars[8][1],
											countChars[7][0],countChars[7][1]);
					
					putchar('\t');
					for(tti=0;tti<dCnt0;tti++){
					  putchar(delStr0[tti]);
					}

					putchar('\t');
					for(tti=0;tti<dCnt1;tti++){
					  putchar(delStr1[tti]);
					}

					putchar('\t');
					for(tti=0;tti<iCnt0;tti++){
					  putchar(insStr0[tti]);
					}

					putchar('\t');
					for(tti=0;tti<iCnt1;tti++){
					  putchar(insStr1[tti]);
					}

					printf("\t%d\t%d",numGood,numStruck);					

					// get non-ref qpos variation

					float medqpos = -1;
					float medAbsDev = -1;
					if(qposCnt>0){
					  medqpos = median(qposCnt,qposP);
					  float absDev[qposCnt];
					  for(tti=0;tti<qposCnt;tti++){
						absDev[tti] = abs(medqpos - qposP[tti]);
					  }
					  medAbsDev = median(qposCnt-1,absDev);
					}
					printf("\t%f",medAbsDev);

					///END MDW
				}



			}
			putchar('\n');
		}
	}

	bcf_close(bp);
	bam_smpl_destroy(sm); free(buf.s);
	for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]);
	free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp);
	bcf_call_del_rghash(rghash);
	bcf_hdr_destroy(bh); bcf_call_destroy(bca); free(bc.PL); free(bcr);
	bam_mplp_destroy(iter);
	bam_header_destroy(h);
	for (i = 0; i < n; ++i) {
		bam_close(data[i]->fp);
		if (data[i]->iter) bam_iter_destroy(data[i]->iter);
		free(data[i]);
	}
	free(data); free(plp); free(ref); free(n_plp);
	return 0;
}
Exemple #23
0
int bsstrand_func(bam1_t *b, const samfile_t *in, samfile_t *out, void *data) {

	bsstrand_data_t *d = (bsstrand_data_t*)data;
	bsstrand_conf_t *conf = d->conf;
	const bam1_core_t *c = &b->core;

	if (c->flag & BAM_FUNMAP){
		if (out) samwrite(out, b);
		d->n_unmapped++;
		return 0;
	}
	
	fetch_refseq(d->rs, in->header->target_name[c->tid], c->pos, c->pos+1);
	uint32_t rpos=c->pos+1, qpos=0;
	int i, nC2T = 0, nG2A = 0;
	uint32_t j;
	char rbase, qbase;

	for (i=0; i<c->n_cigar; ++i) {
		uint32_t op = bam_cigar_op(bam1_cigar(b)[i]);
		uint32_t oplen = bam_cigar_oplen(bam1_cigar(b)[i]);
		switch(op) {
		case BAM_CMATCH:
			for(j=0; j<oplen; ++j) {
				rbase = toupper(getbase_refseq(d->rs, rpos+j));
				qbase = bscall(bam1_seq(b), qpos+j);
				if (rbase == 'C' && qbase == 'T') nC2T += 1;
				if (rbase == 'G' && qbase == 'A') nG2A += 1;
				/* printf("%c vs %c\n", toupper(rbase), qbase); */
			}
			rpos += oplen;
			qpos += oplen;
			break;
		case BAM_CINS:
			qpos += oplen;
			break;
		case BAM_CDEL:
			rpos += oplen;
			break;
		case BAM_CSOFT_CLIP:
			qpos += oplen;
			break;
		default:
			fprintf(stderr, "Unknown cigar, %u\n", op);
			abort();
		}
	}

	char key[2] = {'Z','S'};
	unsigned char *bsstrand = bam_aux_get(b, key);
	if (bsstrand) {
		bsstrand++;
		double s = similarity(nG2A, nC2T);
		if (nG2A > 1 && nC2T > 1 && s > 0.5) {
			if (conf->output_read || conf->output_all_read)
				printf("F\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '?';
			d->n_fail++;
		} else if (*bsstrand == '+' && nG2A > nC2T + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("W2C\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '-';
			d->n_corr++;
		} else if (*bsstrand == '-' && nC2T > nG2A + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("C2W\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '+';
			d->n_corr++;
		} else if (conf->output_all_read) {
			printf("N\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
		}
	} else if (!(c->flag & BAM_FUNMAP) && conf->infer_bsstrand) {
		char bss[3];
		if (similarity(nG2A, nC2T) < 0.5) {
			strcpy(bss, "??");
		} else if (nC2T > nG2A) {
			strcpy(bss, c->flag & BAM_FREVERSE ? "+-" : "++");
		} else {
			strcpy(bss, c->flag & BAM_FREVERSE ? "-+" : "--");
		}
		bam_aux_append(b, "ZS", 'Z', 3, (uint8_t*) bss);
	}

	
	if (out) samwrite(out, b);
	d->n_mapped++;

	return 0;
}
Exemple #24
0
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	int n_seqs, l, i;
	long n_trimmed = 0, n_tot = 0;
	bam1_t *b;
	int res;

	b = bam_init1();
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
#ifdef USE_HTSLIB
	while ((res = sam_read1(bs->fp, bs->h, b)) >= 0) {
#else
	while ((res = bam_read1(bs->fp, b)) >= 0) {
#endif
		uint8_t *s, *q;
		int go = 0;
		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
		if (go == 0) continue;
		l = b->core.l_qseq;
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
#ifdef USE_HTSLIB
		s = bam_get_seq(b); q = bam_get_qual(b);
#else
		s = bam1_seq(b); q = bam1_qual(b);
#endif
		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
		for (i = 0; i != p->full_len; ++i) {
#ifdef USE_HTSLIB
			p->seq[i] = bam_nt16_nt4_table[(int)bam_seqi(s, i)];
#else
			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
#endif
			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
		}
#ifdef USE_HTSLIB
		if (bam_is_rev(b)) { // then reverse 
#else
		if (bam1_strand(b)) { // then reverse 
#endif
			seq_reverse(p->len, p->seq, 1);
			seq_reverse(p->len, p->qual, 0);
		}
		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
#ifdef USE_HTSLIB
		p->name = strdup((const char*)bam_get_qname(b));
#else
		p->name = strdup((const char*)bam1_qname(b));
#endif
		if (n_seqs == n_needed) break;
	}
	if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		bam_destroy1(b);
		return 0;
	}
	bam_destroy1(b);
	return seqs;
}

#define BARCODE_LOW_QUAL 13

bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
	long n_trimmed = 0, n_tot = 0;

	if (l_bc > BWA_MAX_BCLEN) {
		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
		return 0;
	}
	if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((l = kseq_read(seq)) >= 0) {
		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
			// skip reads that are marked to be filtered by Casava
			char *s = index(seq->comment.s, ':');
			if (s && *(++s) == 'Y') {
				continue;
			}
		}
		if (is_64 && seq->qual.l)
			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
		p = &seqs[n_seqs++];
		if (l_bc) { // then trim barcode
			for (i = 0; i < l_bc; ++i)
				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
			p->bc[i] = 0;
			for (; i < seq->seq.l; ++i)
				seq->seq.s[i - l_bc] = seq->seq.s[i];
			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
			if (seq->qual.l) {
				for (i = l_bc; i < seq->qual.l; ++i)
					seq->qual.s[i - l_bc] = seq->qual.s[i];
				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
			}
			l = seq->seq.l;
		} else p->bc[0] = 0;
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		p->seq = (ubyte_t*)calloc(p->full_len, 1);
		for (i = 0; i != p->full_len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
		if (seq->qual.l) { // copy quality
			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		}
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		if (n_seqs == n_needed) break;
	}
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		return 0;
	}
	return seqs;
}

void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs)
{
	int i, j;
	for (i = 0; i != n_seqs; ++i) {
		bwa_seq_t *p = seqs + i;
		for (j = 0; j < p->n_multi; ++j)
			if (p->multi[j].cigar) free(p->multi[j].cigar);
		free(p->name);
		free(p->seq); free(p->rseq); free(p->qual); free(p->aln); free(p->md); free(p->multi);
		free(p->cigar);
	}
	free(seqs);
}
Exemple #25
0
uint8_t *bam1_seq_(bam1_t *b) { return bam1_seq(b); }
Exemple #26
0
static int32_t sw_align_bound(graph_t *g, bam1_t *b, node_t *n, sw_heap_t *heap, uint8_t strand, const char *colors, const char *color_qualities, uint8_t space, cov_cutoffs_t *cutoffs, uint8_t use_qualities, int32_t max_total_coverage, int32_t max_heap_size)
{
	int32_t sw_node_i=-1, sw_node_best_i=-1, sw_node_cur_i=-1, sw_node_next_i=-1;
	int32_t i;
	char base, qual;

	if(0 != pass_filters1(g, n, cutoffs, max_total_coverage)) {
		return -1;
	}

	{ // add the start node to the heap
		// Get first base
		if(SRMA_SPACE_CS == space) {
			base = nt2int_table[(int)colors[1]];
			qual = color_qualities[0]; 
		}
		else {
			if(strand) {
				base = nt4bit_to_int[bam1_seqi(bam1_seq(b), b->core.l_qseq-1)];
				qual = bam1_qual(b)[b->core.l_qseq-1] + 33;
			}
			else {
				base = nt4bit_to_int[bam1_seqi(bam1_seq(b), 0)];
				qual = bam1_qual(b)[0] + 33;
			}
		}
		sw_node_i = sw_heap_get_node_i(heap);
		sw_node_init(&heap->nodes[sw_node_i], NULL, n, n->coverage, base, qual, use_qualities, space); 
		sw_heap_add_i(heap, sw_node_i);
	}

	sw_node_cur_i = sw_heap_poll_i(heap);
	assert(0 <= sw_node_cur_i); // DEBUG
	while(0 <= sw_node_cur_i) {
		//fprintf(stderr, "sw_node_cur_i=%d\n", sw_node_cur_i); // DEBUG
		if(max_heap_size <- heap->queue_end - heap->queue_start + 1) {
			// too many to consider
			//fprintf(stderr, "NOT BOUNDED 2\n"); // DEBUG
			return -1;
		}

		sw_node_next_i = sw_heap_peek_i(heap);
		assert(0 <= sw_node_cur_i); // DEBUG
		while(NODE_INSERTION != __node_type(heap->nodes[sw_node_cur_i].node)
				&& 0 <= sw_node_next_i
				&& 0 == sw_node_compare(&heap->nodes[sw_node_cur_i], &heap->nodes[sw_node_next_i], heap->type)) {
			if(heap->nodes[sw_node_cur_i].score < heap->nodes[sw_node_next_i].score ||
					(heap->nodes[sw_node_cur_i].score == heap->nodes[sw_node_next_i].score &&
					 heap->nodes[sw_node_cur_i].coverage_sum < heap->nodes[sw_node_next_i].coverage_sum)) { 
				sw_node_cur_i = sw_heap_poll_i(heap);
			}
			else {
				// ignore the next node
				sw_heap_poll_i(heap);
			}
			sw_node_next_i = sw_heap_peek_i(heap);
		}
		sw_node_next_i = -1;

		// DEBUG
		/*
		   fprintf(stderr, "read_offset=%d l_qseq-1=%d\n",
		   heap->nodes[sw_node_cur_i].read_offset,
		   b->core.l_qseq-1);
		   */
		if(heap->nodes[sw_node_cur_i].read_offset == b->core.l_qseq-1) { // found, keep best
			if(sw_node_best_i < 0 ||
					heap->nodes[sw_node_best_i].score < heap->nodes[sw_node_cur_i].score ||
					(heap->nodes[sw_node_best_i].score == heap->nodes[sw_node_cur_i].score && 
					 heap->nodes[sw_node_best_i].coverage_sum < heap->nodes[sw_node_cur_i].coverage_sum)) {
				sw_node_best_i = sw_node_cur_i;
			}
		}
		else {
			edge_list_t *list = NULL;
			if(1 == strand) { // reverse
				list = heap->nodes[sw_node_cur_i].node->prev;
			}
			else {
				list = heap->nodes[sw_node_cur_i].node->next;
			}
			{ // get the aligned base and quality
				// do not use color space data for bounding
				if(strand) {
					base = nt4bit_to_int[bam1_seqi(bam1_seq(b), b->core.l_qseq-1-heap->nodes[sw_node_cur_i].read_offset-1)];
					qual = bam1_qual(b)[b->core.l_qseq-1-heap->nodes[sw_node_cur_i].read_offset-1] + 33;
				}
				else {
					base = nt4bit_to_int[bam1_seqi(bam1_seq(b), (heap->nodes[sw_node_cur_i].read_offset+1))];
					qual = bam1_qual(b)[(heap->nodes[sw_node_cur_i].read_offset+1)] + 33;
				}
			}
			//fprintf(stderr, "list->length=%d\n", list->length); // DEBUG
			for(i=0;i<list->length;i++) {
				node_t *node_cur= list->nodes[i];
				// DEBUG
				/*
				   fprintf(stderr, "%d:%d __node_base(node_cur)=%d base=%d __node_type(node_cur)=%d coverages_cur=%d\n",
				   node_cur->contig, node_cur->position,
				   __node_base(node_cur), base, __node_type(node_cur), list->coverages[i]);
				   */
				// base should match unless filters don't pass
				if(__node_base(node_cur) == base) {
					uint16_t coverage_cur = list->coverages[i];
					int32_t pass = pass_filters(g, node_cur, coverage_cur, cutoffs, max_total_coverage);
					//fprintf(stderr, "pass=%d\n", pass); // DEBUG
					if(0 == pass) {
						if(SRMA_SPACE_CS == space) { // use color space data
							base = nt2int_table[(int)colors[1 + (heap->nodes[sw_node_cur_i].read_offset+1)]];
							qual = color_qualities[heap->nodes[sw_node_cur_i].read_offset+1]; 
						}
						// add to the heap
						sw_node_i = sw_heap_get_node_i(heap);
						// DEBUG
						assert(0 <= sw_node_cur_i);
						assert(0 <= heap->nodes[sw_node_cur_i].read_offset);
						sw_node_init(&heap->nodes[sw_node_i], &heap->nodes[sw_node_cur_i], node_cur, coverage_cur, base, qual, use_qualities, space); 
						sw_heap_add_i(heap, sw_node_i);
					}
					else if(pass < 0) {
						//fprintf(stderr, "NOT BOUNDED 3\n"); // DEBUG
						return -1;
					}
				}
			}
		}
		// get the next node
		sw_node_cur_i = sw_heap_poll_i(heap);
	}

	//fprintf(stderr, "BOUNDED %d\n", sw_node_best_i); // DEBUG
	return sw_node_best_i;
}
Exemple #27
0
/* Count matches (OP_MATCH), mismatches (OP_MISMATCH), insertions
 * (OP_INS) and deletions (OP_DEL) for an aligned read. Written to
 * (preallocated, size 4) counts at indices given above. Will ignore
 * all mis-/match bases if their bq is below min_bq.
 *
 * Returns the total number of operations counted (excl. clipped bases
 * or those with bq<min_bq) or -1 on error. Consecutive indels are
 * counted as one operation, using INDEL_QUAL_DEFAULT, which is
 * suboptimal. 0 is a valid return value, e.g. if all bases are below
 * the quality threshold.
 *
 * If quals is not NULL it will be used as a two dim array (has to be
 * preallocated) with OPs as first dim (len NUM_OP_CATS) and the
 * qualities of the bases as second dim. NOTE/FIXME: this uses bq for
 * mis/matches and INDEL_QUAL_DEFAULT for now in case of indels. The
 * number of elements corresponds to the count entry and can be at max
 * readlen.
 * 
 * If target is non-NULL will ignore preloaded variant positions via
 * var_in_ign_list
 *
 * WARNING code duplication with calc_read_alnerrprof but merging the
 * two functions was too complicated (and the latter is unused anyway)
 */
int
count_cigar_ops(int *counts, int **quals, const bam1_t *b,
                const char *ref, int min_bq, char *target)
{
#if 0
#define TRACE 1
#endif
     int num_ops = 0;
     /* modelled after bam.c:bam_calend(), bam_format1_core() and
      * pysam's aligned_pairs (./pysam/csamtools.pyx)
      */
     uint32_t *cigar = bam1_cigar(b);
     const bam1_core_t *c = &b->core;
     uint32_t tpos = c->pos; /* pos on genome */
     uint32_t qpos = 0; /* pos on read/query */
     uint32_t k, i;
#if 0
     int32_t qlen = (int32_t) bam_cigar2qlen(c, cigar); /* read length */
#else
     int qlen = b->core.l_qseq; /* read length */
#endif

     if (! ref) {
          return -1;
     }
     if (! counts) {
          return -1;
     }

     memset(counts, 0, NUM_OP_CATS*sizeof(int));

     /* loop over cigar to get aligned bases
      *
      * read: bam_format1_core(NULL, b, BAM_OFDEC);
      */
     for (k=0; k < c->n_cigar; ++k) { /* n_cigar: number of cigar operations */
          int op = cigar[k] & BAM_CIGAR_MASK; /* the cigar operation */
          uint32_t l = cigar[k] >> BAM_CIGAR_SHIFT;

          /* following conditionals could be collapsed to much shorter
           * code, but we keep them roughly as they were in pysam's
           * aligned_pairs to make later comparison and handling of
           * indels easier
           */
          if (op == BAM_CMATCH || op == BAM_CDIFF) {
               for (i=tpos; i<tpos+l; i++) {                             
                    int actual_op;
                    assert(qpos < qlen);
                    char ref_nt = ref[i];
                    char read_nt = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), qpos)];
                    int bq = bam1_qual(b)[qpos];

                    if (ref_nt != read_nt || op == BAM_CDIFF) {
                         actual_op = OP_MISMATCH;
                    } else {
                         actual_op = OP_MATCH;
                    }

                    /* ignoring base if below min_bq, independent of type */
                    if (bq<min_bq) {
#ifdef TRACE
                         fprintf(stderr, "TRACE(%s): [M]MATCH ignoring base because of bq=%d at %d (qpos %d)\n", bam1_qname(b), bq, i, qpos);
#endif
                         qpos += 1;
                         continue;
                    }

                    /* for mismatches only */
                    if (target && actual_op == OP_MISMATCH) {
                         var_t fake_var;
                         memset(&fake_var, 0, sizeof(var_t));
                         fake_var.chrom = target;
                         fake_var.pos = i;
                         /* FIXME evil, evil hack. only works as long as var_in_ign_list only uses chrom and pos */
                         if (var_in_ign_list(&fake_var)) {

#ifdef TRACE
                              fprintf(stderr, "TRACE(%s): MM: ignoring because in ign list at %d (qpos %d)\n", bam1_qname(b), i, qpos);
#endif
                              qpos += 1;
                              continue;
                         } 
                    }

#ifdef TRACE
                    fprintf(stderr, "TRACE(%s): adding [M]MATCH qpos,tpos,ref,read,bq = %d,%d,%c,%c,%d\n", bam1_qname(b), qpos, tpos, ref_nt, read_nt, bq);
#endif                    
                    counts[actual_op] += 1;
                    if (quals) {
                         quals[actual_op][counts[actual_op]-1] = bq;
                    }

                    qpos += 1;
               }
               tpos += l;

          } else if (op == BAM_CINS || op == BAM_CDEL) {

               if (target) {
                    /* vcf: 
                     * indel at tpos 1 means, that qpos 2 is an insertion  (e.g. A to AT)
                     * del at tpos 1 means, that qpos 2 is missing (e.g. AT to A)
                     */
                    var_t fake_var;
                    fake_var.chrom = target;
                    fake_var.pos = tpos;
                    if (op==BAM_CINS) {
                         fake_var.pos -= 1;
                    }
                    /* FIXME see above: only works as long as var_in_ign_list only uses chrom and pos */
                    if (var_in_ign_list(&fake_var)) {
                         if (op == BAM_CINS) {
                              qpos += l;
                         }
#ifdef TRACE
                         fprintf(stderr, "TRACE(%s): %c: ignoring because in ign list at tpos %d (qpos %d)\n", bam1_qname(b), op == BAM_CINS? 'I':'D', tpos, qpos);
#endif
                         continue;
                    }
               }

#ifdef TRACE
               fprintf(stderr, "TRACE(%s): adding %c qpos,tpos = %d,%d\n", bam1_qname(b), op==BAM_CINS?'I':'D', qpos, tpos);
#endif                    

               if (op == BAM_CINS) {
                    counts[OP_INS] += 1; /* counts indel as 1 operation only */
                    if (quals) {
                         quals[OP_INS][counts[OP_INS]-1] = INDEL_QUAL_DEFAULT; /* FIXME use iq */
                    }
                    qpos += l;/* forward query pos by length of operation */

               } else if (op == BAM_CDEL) {
                    counts[OP_DEL] += 1; /* counts indel as 1 operation only */
                    if (quals) {
                         quals[OP_DEL][counts[OP_DEL]-1] = INDEL_QUAL_DEFAULT; /* FIXME use dq */
                    }
                    tpos += l; /* forward genome pos by length of operation */

               } else {
                    LOG_FATAL("%s\n", "INTERNAL ERROR: should never get here");
                    exit(1);
               }

          } else if (op == BAM_CREF_SKIP) {
               tpos += l;

          } else if (op == BAM_CSOFT_CLIP) {
#if 0
               printf("SOFT CLIP qpos = %d\n", qpos);
#endif
               qpos += l;

          } else if (op != BAM_CHARD_CLIP) {
               LOG_WARN("Untested op %d in cigar %s\n", op, cigar_str_from_bam(b));
               /* don't think we need to do anything here */
          }
     } /* for k */

     assert(qpos == bam_calend(&b->core, bam1_cigar(b))); /* FIXME correct assert? what if hard clipped? */
     if (qpos != qlen) {
          LOG_WARN("got qpos=%d and qlen=%d for cigar %s l_qseq %d in read %s\n", qpos, qlen, cigar_str_from_bam(b), b->core.l_qseq, bam1_qname(b));
     }
     assert(qpos == qlen);

     num_ops = 0;
     for (i=0; i<NUM_OP_CATS; i++) {
          num_ops += counts[i];
#ifdef TRACE
          int j;
          for (j=0; j<counts[i]; j++) {
               fprintf(stderr, "TRACE(%s) op %s #%d: %d\n", bam1_qname(b), op_cat_str[i], j, quals[i][j]);
          }
#endif
     }
     return num_ops;
}
Exemple #28
0
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
{
	uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
	int i;
	const bam1_core_t *c = &b->core;
	kstring_t str;
	str.l = str.m = 0; str.s = 0;

	kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str);
	if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); }
	else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag);
	else { // BAM_OFSTR
		for (i = 0; i < 16; ++i)
			if ((c->flag & 1<<i) && bam_flag2char_table[i])
				kputc(bam_flag2char_table[i], &str);
		kputc('\t', &str);
	}
	if (c->tid < 0) kputsn("*\t", 2, &str);
	else {
		if (header) kputs(header->target_name[c->tid] , &str);
		else kputw(c->tid, &str);
		kputc('\t', &str);
	}
	kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str);
	if (c->n_cigar == 0) kputc('*', &str);
	else {
		for (i = 0; i < c->n_cigar; ++i) {
			kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
			kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
		}
	}
	kputc('\t', &str);
	if (c->mtid < 0) kputsn("*\t", 2, &str);
	else if (c->mtid == c->tid) kputsn("=\t", 2, &str);
	else {
		if (header) kputs(header->target_name[c->mtid], &str);
		else kputw(c->mtid, &str);
		kputc('\t', &str);
	}
	kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str);
	if (c->l_qseq) {
		for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
		kputc('\t', &str);
		if (t[0] == 0xff) kputc('*', &str);
		else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
	} else kputsn("*\t*", 3, &str);
	s = bam1_aux(b);
	while (s < b->data + b->data_len) {
		uint8_t type, key[2];
		key[0] = s[0]; key[1] = s[1];
		s += 2; type = *s; ++s;
		kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str);
		if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }
		else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }
		else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }
		else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }
		else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }
		else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }
		else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }
		else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
		else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
		else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
		else if (type == 'B') {
			uint8_t sub_type = *(s++);
			int32_t n;
			memcpy(&n, s, 4);
			s += 4; // no point to the start of the array
			kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing
			for (i = 0; i < n; ++i) {
				kputc(',', &str);
				if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; }
				else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; }
				else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; }
				else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; }
				else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; }
				else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; }
				else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; }
			}
		}
	}
	return str.s;
}
Exemple #29
0
/* Counts probability of non-match count along the read after
 * subtracting error prob at that position (using the original
 * orientation). used_pos is an array of ints indicating whether
 * position was used or not (trimmed, clipped etc). alnerrprof and
 * used_pos must be of at least length b->core.l_qseq. Note: will add
 * to alnerrprof and used_pos, i.e. arrays should be initialized to 0 if
 * you don't want aggregate values.
 *
 * WARNING code duplication with count_cigar_ops but merging the two
 * functions is messy.
 */
void
calc_read_alnerrprof(double *alnerrprof, unsigned long int *used_pos, 
                   const bam1_t *b, const char *ref)
{
     /* modelled after bam.c:bam_calend(), bam_format1_core() and
      * pysam's aligned_pairs (./pysam/csamtools.pyx)
      */
     uint32_t *cigar = bam1_cigar(b);
     uint32_t k, i;
     const bam1_core_t *c = &b->core;
#if 0
     int32_t qlen = (int32_t) bam_cigar2qlen(c, cigar); /* read length */
#else
     int qlen = b->core.l_qseq; /* read length */
#endif
     uint32_t pos = c->pos; /* pos on genome */
     uint32_t qpos = 0; /* pos on read/query */
     uint32_t qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;/* original qpos before mapping as possible reverse */


     /* loop over cigar to get aligned bases
      *
      * read: bam_format1_core(NULL, b, BAM_OFDEC);
      */
     for (k=0; k < c->n_cigar; ++k) { /* n_cigar: number of cigar operations */
          int op = cigar[k] & BAM_CIGAR_MASK; /* the cigar operation */
          uint32_t l = cigar[k] >> BAM_CIGAR_SHIFT;

          /* following conditionals could be collapsed to much shorter
           * code, but we keep them as they were in pysam's
           * aligned_pairs to make later handling of indels easier
           */
          if (op == BAM_CMATCH || op == BAM_CDIFF) {
               for (i=pos; i<pos+l; i++) {                             
                    assert(qpos < qlen);
                    /* case agnostic */
                    char ref_nt = ref[i];
                    char read_nt = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), qpos)];
                    int bq = bam1_qual(b)[qpos];
#if 0
                    printf("[M]MATCH qpos,i,ref,read = %d,%d,%c,%c\n", qpos, i, ref_nt, read_nt);
#endif                    

                    if (ref_nt != 'N') {
                         if (ref_nt != read_nt || op == BAM_CDIFF) {
                              alnerrprof[qpos_org] += (1.0 - PHREDQUAL_TO_PROB(bq));
                         } /* otherwise leave at 0.0 but count anyway */
                         used_pos[qpos_org] += 1;
                    }
                    qpos += 1;
                    qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;
               }
               pos += l;

          } else if (op == BAM_CINS) {
               for (i=pos; i<pos+l; i++) {
                    assert(qpos < qlen);
                    
                    alnerrprof[qpos] += (1.0 - PHREDQUAL_TO_PROB(INDEL_QUAL_DEFAULT));
                    used_pos[qpos] += 1;
#if 0
                    printf("INS qpos,i = %d,None\n", qpos);
#endif
                    qpos += 1;
                    qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;
               }
               
          } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
               for (i=pos; i<pos+l; i++) {
#if 0
                    printf("DEL qpos,i = None,%d\n", i);
#endif

                    if (op == BAM_CDEL) {
                         alnerrprof[qpos] += (1.0 - PHREDQUAL_TO_PROB(INDEL_QUAL_DEFAULT));
                         used_pos[qpos] += 1;
                    }
               }
               pos += l;
               /* deletion: don't increase qpos */

          } else if (op == BAM_CSOFT_CLIP) {
#if 0
               printf("SOFT CLIP qpos = %d\n", qpos);
#endif
               qpos += l;
               qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;

          } else if (op != BAM_CHARD_CLIP) {
               LOG_WARN("Unknown op %d in cigar %s\n", op, cigar_str_from_bam(b));

          }
     } /* for k */
     assert(pos == bam_calend(&b->core, bam1_cigar(b))); /* FIXME correct assert? what if hard clipped? */
     if (qpos != qlen) {
          LOG_FIXME("got qpos=%d and qlen=%d for cigar %s l_qseq %d\n", qpos, qlen, cigar_str_from_bam(b), b->core.l_qseq);
     }
     assert(qpos == qlen); /* FIXME correct assert? What if hard clipped? */

#if 0
     fprintf(stderr, "%s:", __FUNCTION__);
     for (i=0; i< b->core.l_qseq; i++) {
          fprintf(stderr, " %g/%d", alnerrprof[i], used_pos[i]);
     }
     fprintf(stderr, "\n");
#endif
}
Exemple #30
0
// TODO soft clipping
bam1_t *sw_align_update_bam(bam1_t *bam_old, char *rg_id, sw_heap_t *heap, int32_t sw_node_best_i, uint8_t space, char *colors, char *color_qualities, uint8_t strand, uint8_t correct_bases)
{
	bam1_t *bam_new=NULL;
	int32_t sw_node_cur_i=-1, sw_node_prev_i=-1;
	int32_t i;
	int32_t cigar_cur_op, cigar_prev_op;
	int32_t cigar_cur_length, cigar_prev_length;
	uint32_t read_index;
	char *color_errors = NULL;

	if(sw_node_best_i < 0) { // none found, do not modify alignment
		return bam_old;
	}

	bam_new = srma_calloc(1, sizeof(bam1_t), __func__, "bam_new");

	if(1 == strand) {
		read_index = 0;
	}
	else {
		read_index = bam_old->core.l_qseq-1;
	}

	{ // query name
		bam_new->core.l_qname = bam_old->core.l_qname;
		bam_new->data_len += bam_new->core.l_qname;
		sw_align_bam_alloc_data(bam_new, bam_new->data_len);
		memcpy(bam1_qname(bam_new), bam1_qname(bam_old), bam_old->core.l_qname);
	}
	{ // flag
		bam_new->core.flag = bam_old->core.flag;
	}
	{ // tid, pos, qual
		bam_new->core.tid = heap->nodes[sw_node_best_i].node->contig-1; // it is one-based, we want zero-based
		if(1 == strand) { // reverse strand
			bam_new->core.pos = heap->nodes[sw_node_best_i].node->position-1;
		}
		else {
			bam_new->core.pos = heap->nodes[sw_node_best_i].start_position-1; // zero-based
		}
		bam_new->core.qual = bam_old->core.qual; // should we change the mapping quality?
		bam_new->core.mtid = -1;
		bam_new->core.mpos = -1;
		bam_new->core.isize = 0;
	}
	{ // cigar length
		bam_new->core.n_cigar = 0;
		cigar_cur_op = cigar_prev_op = -1;
		sw_node_cur_i = sw_node_best_i;
		while(0 <= sw_node_cur_i) {
			if(0 <= sw_node_prev_i  && BAM_CDEL == cigar_prev_op && 1 < fabs(heap->nodes[sw_node_cur_i].node->position - heap->nodes[sw_node_prev_i].node->position)) {
				cigar_cur_op = BAM_CDEL;
			}	
			else {
				switch(__node_type(heap->nodes[sw_node_cur_i].node)) {
					case NODE_MATCH:
					case NODE_MISMATCH:
						cigar_cur_op = BAM_CMATCH;
						break;
					case NODE_INSERTION:
						cigar_cur_op = BAM_CINS;
						break;
					default:
						srma_error(__func__, "unknown node type", Exit, OutOfRange);
				}
			}
			if(cigar_prev_op != cigar_cur_op) {
				// update the previous cigar operator
				cigar_prev_op = cigar_cur_op;
				bam_new->core.n_cigar++;
			}
			// Update
			if(BAM_CDEL != cigar_cur_op) {
				sw_node_prev_i = sw_node_cur_i;
				sw_node_cur_i = heap->nodes[sw_node_cur_i].prev_i;
			}
		}
	}

	{ // cigar and seq
		uint32_t *cigar_ptr=NULL;
		uint8_t *seq_ptr=NULL;
		uint32_t cigar_i = 0;
		// cigar
		bam_new->data_len += bam_new->core.n_cigar*sizeof(uint32_t);
		sw_align_bam_alloc_data(bam_new, bam_new->data_len);
		cigar_ptr = bam1_cigar(bam_new);
		// seq
		bam_new->core.l_qseq = bam_old->core.l_qseq;
		bam_new->data_len += (bam_new->core.l_qseq + 1)/2;
		sw_align_bam_alloc_data(bam_new, bam_new->data_len);
		seq_ptr = bam1_seq(bam_new);
		// fill in cigar and seq
		cigar_i = (1 == strand) ? bam_new->core.n_cigar-1 : 0;
		cigar_cur_op = cigar_prev_op = -1;
		cigar_cur_length = cigar_prev_length = -1;
		sw_node_cur_i = sw_node_best_i;
		while(0 <= sw_node_cur_i) {
			if(0 <= sw_node_prev_i && BAM_CDEL == cigar_prev_op && 1 < fabs(heap->nodes[sw_node_cur_i].node->position - heap->nodes[sw_node_prev_i].node->position)) {
				cigar_cur_op = BAM_CDEL;
			}	
			else {
				switch(__node_type(heap->nodes[sw_node_cur_i].node)) {
					case NODE_MATCH:
					case NODE_MISMATCH:
						cigar_cur_op = BAM_CMATCH;
						break;
					case NODE_INSERTION:
						cigar_cur_op = BAM_CINS;
						break;
					default:
						srma_error(__func__, "unknown node type", Exit, OutOfRange);
				}
				// pack sequence
				if(1 == strand && 0 == read_index%2) {
					seq_ptr[read_index/2] = 0;
				}
				else if(0 == strand && 1 == read_index%2) {
					seq_ptr[read_index/2] = 0;
				}
				// DEBUG
				/*
				   fprintf(stderr, "read_index=%d base=%d\n",
				   read_index, __node_base(heap->nodes[sw_node_cur_i].node));
				   */
				seq_ptr[read_index/2] |= int_to_nt4bit[__node_base(heap->nodes[sw_node_cur_i].node)] << 4*(1-(read_index%2));
				if(1 == strand) {
					read_index++;
				}
				else {
					read_index--;
				}
			}
			if(cigar_prev_op != cigar_cur_op) {
				// add the previous cigar operator
				if(-1 != cigar_prev_op) {
					bam1_cigar(bam_new)[cigar_i] = (cigar_prev_length << BAM_CIGAR_SHIFT) | cigar_prev_op;
					if(1 == strand) { // reverse strand
						cigar_i--;
					}
					else {
						cigar_i++;
					}
				}

				// update the previous cigar operator
				cigar_prev_op = cigar_cur_op;
				if(cigar_cur_op == BAM_CDEL) {
					// deletion length
					cigar_prev_length = (int)fabs(heap->nodes[sw_node_cur_i].node->position - heap->nodes[sw_node_cur_i].node->position) - 1;
				}
				else {
					cigar_prev_length = 1;
				}
			}
			else {
				cigar_prev_length++;
			}
			// Update
			if(BAM_CDEL != cigar_cur_op) {
				sw_node_prev_i = sw_node_cur_i;
				sw_node_cur_i = heap->nodes[sw_node_cur_i].prev_i;
			}
		}
		if(0 < cigar_prev_length) {
			if(-1 == cigar_prev_op || BAM_CDEL == cigar_prev_op) {
				srma_error(__func__, "Alignment ended with a null cigar or a deletion", Exit, OutOfRange);
			}	
			bam1_cigar(bam_new)[cigar_i] = (cigar_prev_length << BAM_CIGAR_SHIFT) | cigar_prev_op;
			// DEBUG
			if(1 == strand) { // reverse strand
				assert(cigar_i == 0);
			}
			else {
				assert(cigar_i == bam_new->core.n_cigar-1);
			}
		}
	}

	{ // qualities
		uint8_t *qual_ptr = NULL;
		char qual, q1, q2;
		uint8_t prev_base = 0, next_base;

		bam_new->data_len += bam_new->core.l_qseq;
		sw_align_bam_alloc_data(bam_new, bam_new->data_len);
		qual_ptr = bam1_qual(bam_new);

		if(space == SRMA_SPACE_CS) {
			color_errors = srma_malloc(sizeof(char)*(1 + bam_new->core.l_qseq), __func__, "color_errors");
			prev_base = nt2int_table[(int)colors[0]];
			for(i=0;i<bam_new->core.l_qseq;i++) {
				if(0 == strand) {
					next_base = nt4bit_to_int[bam1_seqi(bam1_seq(bam_new), i)];
				}
				else {
					next_base = nt4bit_to_int[bam1_seqi(bam1_seq(bam_new), bam_new->core.l_qseq-i-1)];
					if(next_base < 4) next_base = 3 - next_base;
				}
				if((prev_base ^ next_base) == nt2int_table[(int)colors[i+1]]) {
					color_errors[i] = '-';
				}
				else {
					color_errors[i] = colors[i+1];
				}
				prev_base = next_base;
			}
			color_errors[i]='\0';

			// Get new base qualities based on color qualities
			for(i=0;i<bam_new->core.l_qseq;i++) {
				// use MAQ 0.7.1 conversion
				if(i == bam_new->core.l_qseq-1) { 
					qual = srma_char2qual(color_qualities[i]);
				}
				else {
					int m1, m2;
					if(0 == strand) { // forward
						m1 = ('-' == color_errors[i]) ? 1 : 0;
						m2 = ('-' == color_errors[i+1]) ? 1 : 0;
						q1 = color_qualities[i];
						q2 = color_qualities[i+1];
					}
					else {
						m1 = ('-' == color_errors[bam_new->core.l_qseq-i-1]) ? 1 : 0;
						m2 = ('-' == color_errors[bam_new->core.l_qseq-i-2]) ? 1 : 0;
						q1 = color_qualities[bam_new->core.l_qseq-i-1];
						q2 = color_qualities[bam_new->core.l_qseq-i-2];
					}
					if(1 == m1 && 1 == m2) {
						qual = srma_char2qual(q1) + srma_char2qual(q2) + 10; 
					}
					else if(1 == m1) {
						qual = srma_char2qual(q1) - srma_char2qual(q2);
					}
					else if(1 == m2) {
						qual = srma_char2qual(q2) - srma_char2qual(q1);
					}
					else {
						qual = 1;
					}
				}
				if(0 == strand) {
					bam1_qual(bam_new)[i] = __bound_qual(qual);
				}
				else {
					bam1_qual(bam_new)[bam_new->core.l_qseq-i-1] = __bound_qual(qual);
				}
			}
		}
		else if(1 == correct_bases) {
			// Get new base qualities
			for(i=0;i<bam_new->core.l_qseq;i++) {
				if(bam1_seqi(bam1_seq(bam_new), i) == bam1_seqi(bam1_seq(bam_old), i)) {
					bam1_qual(bam_new)[i] = bam1_qual(bam_old)[i];
				}
				else {
					qual = srma_char2qual(bam1_qual(bam_old)[i]) -  33; 
					bam1_qual(bam_new)[i] = srma_qual2char(__bound_qual(qual - SRMA_CORRECT_BASE_QUALITY_PENALTY));
				}
			}
		}
		else {
			// Copy old quality
			memcpy(bam1_qual(bam_new), bam1_qual(bam_old), bam_new->core.l_qseq);
		}
	}

	// TODO soft-clipping

	{ // Add in any auxiliary data as necessary
		uint8_t *s;
		int32_t i = 0;
		bam_new->l_aux = 0;
			
		while(NULL != sw_align_save_tags[i]) {
			__copy_old(sw_align_save_tags[i]);
			i++;
		}
		
		// TODO 
		// PG

		// TODO: is AS correct
		bam_aux_append(bam_new, "AS", 'i', sizeof(uint32_t), (uint8_t*)&heap->nodes[sw_node_best_i].score);
		if(1 == correct_bases) {
			int32_t l = bam_old->core.l_qseq;
			char *str;

			str = srma_malloc(sizeof(char)*(l+1), __func__, "seq");
			for(i=0;i<l;i++) {
				str[i] = bam_nt16_rev_table[bam1_seqi(bam1_seq(bam_old), i)];
			}
			str[i] = '\0';
			bam_aux_append(bam_new, "XO", 'Z', l+1, (uint8_t*)str);

			for(i=0;i<l;i++) {
				str[i] = bam1_qual(bam_old)[i] + 33;  
			}
			str[i] = '\0';
			bam_aux_append(bam_new, "XQ", 'Z', l+1, (uint8_t*)str);

			free(str);
		}
		bam_aux_append(bam_new, "XC", 'i', sizeof(uint32_t), (uint8_t*)&heap->nodes[sw_node_best_i].coverage_sum);
		if(space == SRMA_SPACE_CS) {
			bam_aux_append(bam_new, "XE", 'Z', bam_new->core.l_qseq+1, (uint8_t*)color_errors);
		}
	}

	// destroy the old bam structure
	bam_destroy1(bam_old);
		
	if(space == SRMA_SPACE_CS) {
		free(color_errors);
	}

	return bam_new;
}