Beispiel #1
0
int bam_pad2unpad(bamFile in, bamFile out)
{
	bam_header_t *h;
	bam1_t *b;
	kstring_t r, q;
	uint32_t *cigar2 = 0;
	int n2 = 0, m2 = 0, *posmap = 0;

	h = bam_header_read(in);
	bam_header_write(out, h);
	b = bam_init1();
	r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
	while (bam_read1(in, b) >= 0) {
		uint32_t *cigar = bam1_cigar(b);
		n2 = 0;
		if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam1_qname(b), h->target_name[b->core.tid]) == 0) {
			int i, k;
			unpad_seq(b, &r);
			write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
			replace_cigar(b, n2, cigar2);
			posmap = realloc(posmap, r.m * sizeof(int));
			for (i = k = 0; i < r.l; ++i) {
				posmap[i] = k; // note that a read should NOT start at a padding
				if (r.s[i]) ++k;
			}
		} else {
			int i, k, op;
			unpad_seq(b, &q);
			if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[0]);
			for (i = 0, k = b->core.pos; i < q.l; ++i, ++k)
				q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD);
			for (i = k = 1, op = q.s[0]; i < q.l; ++i) {
				if (op != q.s[i]) {
					write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
					op = q.s[i]; k = 1;
				} else ++k;
			}
			write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
			if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
			for (i = 2; i < n2; ++i)
				if (bam_cigar_op(cigar2[i]) == BAM_CMATCH && bam_cigar_op(cigar2[i-1]) == BAM_CPAD && bam_cigar_op(cigar2[i-2]) == BAM_CMATCH)
					cigar2[i] += cigar2[i-2], cigar2[i-2] = cigar2[i-1] = 0;
			for (i = k = 0; i < n2; ++i)
				if (cigar2[i]) cigar2[k++] = cigar2[i];
			n2 = k;
			replace_cigar(b, n2, cigar2);
			b->core.pos = posmap[b->core.pos];
		}
		bam_write1(out, b);
	}
	free(r.s); free(q.s); free(posmap);
	bam_destroy1(b);
	bam_header_destroy(h);
	return 0;
}
Beispiel #2
0
int bam_cigar2ulen(int n_cigar, const uint32_t *cigar)
{
    int k, l;
    for (k = l = 0; k < n_cigar; ++k) {
        if (bam_cigar_type(bam_cigar_op(cigar[k])) &1) {
            l += bam_cigar_oplen(cigar[k]);
        } else if (bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP) {
            l += bam_cigar_oplen(cigar[k]);
        }
    }
    return l;
}
Beispiel #3
0
int bam_cigar2matches(int n_cigar, const uint32_t *cigar)
{
    int k, l;
    for (k = l = 0; k < n_cigar; ++k)
        if (bam_cigar_type(bam_cigar_op(cigar[k]))==3)
            l += bam_cigar_oplen(cigar[k]);
    return l;
}
Beispiel #4
0
CigarParser::CigarParser(uint32_t const* cigar, int len)
    : cigar_(cigar)
    , len_(len)
    , readPos_(0)
    , refPos_(0)
    , currentOpIdx_(0)
    , currentOp_(bam_cigar_op(*cigar))
    , currentOpLen_(bam_cigar_oplen(*cigar))
    , started_(false)
{
}
/* Finds InDels in a BAM or CRAM file, adding them to the linked list
 *
 * fp    Input BAM/CRAM file
 * hdr   The header for the BAM/CRAM file
 * k     The K-mer size
 *
 * discussion The linked list will need to be destroyed with destroyNodes()
 */
void findInDels(htsFile *fp, bam_hdr_t *hdr, int minMAPQ, int k) {
    bam1_t *b = bam_init1();
    int i, op;
    InDel *node;
    uint32_t *cigar;

    while(sam_read1(fp, hdr, b) > 0) {
        if(b->core.qual < minMAPQ) continue;
        cigar = bam_get_cigar(b);
        for(i=0; i<b->core.n_cigar; i++) {
            op = bam_cigar_op(cigar[i]);
            if(op == 1 || op == 2) {
                node = makeNode(b, i);
                if(node == NULL) goto quit;
                insertNode(node, k);
                while(++i < b->core.n_cigar) { //Skip adjacent D/I operations
                    op = bam_cigar_op(cigar[i]);
                    if(op != 1 && op != 2) break;
                    continue;
                }
            }
        }
    }

    //Ensure that all ROIs are at least k apart
    lastTargetNode = firstTargetNode->next;
    while(lastTargetNode->next) {
        i = TargetNodeCmp(lastTargetNode,lastTargetNode->next, k);
        assert(i<=0);
        if(i==0) {
            lastTargetNode->end = lastTargetNode->next->end;
            lastTargetNode->count += (lastTargetNode->count+lastTargetNode->next->count > lastTargetNode->count)?lastTargetNode->next->count:0xFFFFFFFF;
            removeNode(lastTargetNode->next);
        } else {
            lastTargetNode = lastTargetNode->next;
        }
    }

quit:
    bam_destroy1(b);
}
Beispiel #6
0
// Returns 0 to indicate read should be output 1 otherwise
static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settings)
{
    if (settings->remove_B) bam_remove_B(b);
    if (settings->min_qlen > 0) {
        int k, qlen = 0;
        uint32_t *cigar = bam_get_cigar(b);
        for (k = 0; k < b->core.n_cigar; ++k)
            if ((bam_cigar_type(bam_cigar_op(cigar[k]))&1) || bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP)
                qlen += bam_cigar_oplen(cigar[k]);
        if (qlen < settings->min_qlen) return 1;
    }
    if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off))
        return 1;
    if (settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, h->target_name[b->core.tid], b->core.pos, bam_endpos(b))))
        return 1;
    if (settings->subsam_frac > 0.) {
        uint32_t k = __ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(b)) ^ settings->subsam_seed);
        if ((double)(k&0xffffff) / 0x1000000 >= settings->subsam_frac) return 1;
    }
    if (settings->rghash) {
        uint8_t *s = bam_aux_get(b, "RG");
        if (s) {
            khint_t k = kh_get(rg, settings->rghash, (char*)(s + 1));
            if (k == kh_end(settings->rghash)) return 1;
        }
    }
    if (settings->library) {
        const char *p = bam_get_library((bam_hdr_t*)h, b);
        if (!p || strcmp(p, settings->library) != 0) return 1;
    }
    if (settings->remove_aux_len) {
        size_t i;
        for (i = 0; i < settings->remove_aux_len; ++i) {
            uint8_t *s = bam_aux_get(b, settings->remove_aux[i]);
            if (s) {
                bam_aux_del(b, s);
            }
        }
    }
    return 0;
}
Beispiel #7
0
// Adapted from samtools/bam.c
int32_t b2g_bam_clippedlength(bam1_t *b) {
  const bam1_core_t c = b->core;
  const uint32_t *cigar = bam1_cigar(b);
  uint32_t k;
  int32_t l = 0;
  for (k = 0; k < c.n_cigar; ++k) {
    if ('S' == bam_cigar_opchr(bam_cigar_op(cigar[k]))) {
      l += bam_cigar_oplen(cigar[k]);
    }
  }
  return l;
}
/* Make a node containing an InDel
 *
 * b     The input read
 * cigar_op_num The operation number of the Insertion/Deletion
 *
 * returns a node, that must be either inserted into the linked list or
 * destroyed with destroyNode()
 */
InDel *makeNode(bam1_t *b, int cigar_op_num) {
    int i, op, oplen, quit = 0, type;
    int32_t start = b->core.pos-1;
    int32_t end;
    uint32_t *cigar = bam_get_cigar(b);
    InDel *node;

    for(i=0; i<cigar_op_num; i++) {
        oplen = bam_cigar_oplen(cigar[i]);
        type = bam_cigar_type(bam_cigar_op(cigar[i]));
        if(type & 2) start += oplen;
    }

    end = ++start;
    for(i=cigar_op_num; i<b->core.n_cigar; i++) {
        op = bam_cigar_op(cigar[i]);
        oplen = bam_cigar_oplen(cigar[i]);
        switch(op) {
        case 1: //I
        case 2: //D
            end = (end>start+oplen) ? end : start+oplen;
            break;
        default :
            quit = 1;
            break;
        }
        if(quit) break;
    }

    //Make the node
    node = calloc(1, sizeof(InDel));
    node->tid = b->core.tid;
    node->start = start;
    node->end = end;
    node->count = 1;

    return node;
}
Beispiel #9
0
void CigarParser::advance() {
    int type = bam_cigar_type(currentOp_);
    if (type & BAM_CONSUME_REFERENCE) {
        refPos_ += currentOpLen_;
    }
    if (type & BAM_CONSUME_QUERY) {
        readPos_ += currentOpLen_;
    }

    ++currentOpIdx_;
    assert(currentOpIdx_ < len_);
    currentOp_ = bam_cigar_op(cigar_[currentOpIdx_]);
    currentOpLen_ = bam_cigar_oplen(cigar_[currentOpIdx_]);
}
Beispiel #10
0
static void unpad_seq(bam1_t *b, kstring_t *s)
{
	int k, j, i;
	uint32_t *cigar = bam1_cigar(b);
	uint8_t *seq = bam1_seq(b);
	ks_resize(s, b->core.l_qseq);
	for (k = 0, s->l = 0, j = 0; k < b->core.n_cigar; ++k) {
		int op, ol;
		op = bam_cigar_op(cigar[k]);
		ol = bam_cigar_oplen(cigar[k]);
		assert(op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CSOFT_CLIP);
		if (op == BAM_CMATCH) {
			for (i = 0; i < ol; ++i) s->s[s->l++] = bam1_seqi(seq, j);
			++j;
		} else if (op == BAM_CSOFT_CLIP) {
			j += ol;
		} else {
			for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
		}
	}
}
Beispiel #11
0
static int unpad_seq(bam1_t *b, kstring_t *s)
{
    // Returns 0 on success, -1 on an error
    int k, j, i;
    int length;
    int cigar_n_warning = 0; /* Make this a global and limit to one CIGAR N warning? */
    uint32_t *cigar = bam_get_cigar(b);
    uint8_t *seq = bam_get_seq(b);

    // b->core.l_qseq gives length of the SEQ entry (including soft clips, S)
    // We need the padded length after alignment from the CIGAR (excluding
    // soft clips S, but including pads from CIGAR D operations)
    length = bam_cigar2rlen(b->core.n_cigar, cigar);
    ks_resize(s, length);
    for (k = 0, s->l = 0, j = 0; k < b->core.n_cigar; ++k) {
        int op, ol;
        op = bam_cigar_op(cigar[k]);
        ol = bam_cigar_oplen(cigar[k]);
        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
            for (i = 0; i < ol; ++i, ++j) s->s[s->l++] = bam_seqi(seq, j);
        } else if (op == BAM_CSOFT_CLIP) {
            j += ol;
        } else if (op == BAM_CHARD_CLIP) {
            /* do nothing */
        } else if (op == BAM_CDEL) {
            for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
        } else if (op == BAM_CREF_SKIP) {
            /* Treat CIGAR N as D (not ideal, but better than ignoring it) */
            for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
            if (0 == cigar_n_warning) {
                cigar_n_warning = -1;
                fprintf(stderr, "[depad] WARNING: CIGAR op N treated as op D in read %s\n", bam_get_qname(b));
            }
        } else {
            fprintf(stderr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam_get_qname(b));
            return -1;
        }
    }
    return length != s->l;
}
Beispiel #12
0
void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) {
    double w;

    if (no_fractional_weight) w = 1.0;
    else {
      uint8_t *p_tag = bam_aux_get(b, "ZW");
      if (p_tag == NULL) return;
      w = bam_aux2f(p_tag);
    }

    int pos = b->core.pos;
    uint32_t *p = bam_get_cigar(b);
    
    for (int i = 0; i < (int)b->core.n_cigar; ++i, ++p) {
      char op = bam_cigar_op(*p);
      int op_len = bam_cigar_oplen(*p);

      if (op == BAM_CMATCH)
	for (int j = 0; j < op_len; ++j, ++pos) wiggle.read_depth[pos] += w;
      else pos += ((bam_cigar_type(op) & 2) ? op_len : 0);
    }
}
Beispiel #13
0
unsigned alignment_bounds_from_bin_cigar (uint32_t* cigar_bin, unsigned cigar_bin_sz, uint8_t forward, unsigned qry_len, unsigned* q_beg, unsigned* q_end, unsigned* r_beg, unsigned* r_end)
{
    unsigned oplen, op, constype;
    uint32_t *sent;
    *q_beg = *q_end = *r_beg = *r_end = 0;
    unsigned allen = 0;
    uint32_t tail = 0;
    for (sent = cigar_bin + cigar_bin_sz; cigar_bin != sent; ++cigar_bin)
    {
        oplen = bam_cigar_oplen (*cigar_bin);
        op = bam_cigar_op (*cigar_bin);
        constype = bam_cigar_type (*cigar_bin);

        if (tail && (op == BAM_CHARD_CLIP || op == BAM_CSOFT_CLIP)) // the aligned zone ended, clip started. Note that tail indels are not valid, so we do not assume they are possible..
            break;

        if (op != BAM_CHARD_CLIP && op != BAM_CSOFT_CLIP)
            tail = 1;

        if (constype & CONSUME_QRY)
        {
            if (!tail) *q_beg += oplen;
            *q_end += oplen;
        }
        if (constype & CONSUME_REF)
        {
            if (!tail) *r_beg += oplen;
            *r_end += oplen;
        }
        allen += oplen;
    }
    if (!forward)
    {
        unsigned tmp = qry_len - *q_beg;
        *q_beg = qry_len - *q_end;
        *q_end = tmp;
    }
    return allen;
}
Beispiel #14
0
void cigar_print (FILE* f, uint32_t* cigar, unsigned cigar_sz)
{
    uint32_t* sent;
    for (sent = cigar+cigar_sz; cigar != sent; ++cigar)
    {
        uint32_t curop = bam_cigar_op (*cigar);
        uint32_t count = bam_cigar_oplen (*cigar);
        char schar;
        switch (curop)
        {
        case BAM_CHARD_CLIP:
            schar = 'H';
            break;
        case BAM_CSOFT_CLIP: // skip
            schar = 'S';
            break;
        case BAM_CMATCH:
            schar = 'M';
            break;
        case BAM_CEQUAL:
            schar = '=';
            break;
        case BAM_CDIFF:
            schar = '#';
            break;
        case BAM_CINS:
            schar = 'I';
            break;
        case BAM_CDEL:
            schar = 'I';
            break;
        default:
            schar = '?';
        }
        fprintf (f, "%d%c", count, schar);
    }
}
Beispiel #15
0
int bsstrand_func(bam1_t *b, const samfile_t *in, samfile_t *out, void *data) {

	bsstrand_data_t *d = (bsstrand_data_t*)data;
	bsstrand_conf_t *conf = d->conf;
	const bam1_core_t *c = &b->core;

	if (c->flag & BAM_FUNMAP){
		if (out) samwrite(out, b);
		d->n_unmapped++;
		return 0;
	}
	
	fetch_refseq(d->rs, in->header->target_name[c->tid], c->pos, c->pos+1);
	uint32_t rpos=c->pos+1, qpos=0;
	int i, nC2T = 0, nG2A = 0;
	uint32_t j;
	char rbase, qbase;

	for (i=0; i<c->n_cigar; ++i) {
		uint32_t op = bam_cigar_op(bam1_cigar(b)[i]);
		uint32_t oplen = bam_cigar_oplen(bam1_cigar(b)[i]);
		switch(op) {
		case BAM_CMATCH:
			for(j=0; j<oplen; ++j) {
				rbase = toupper(getbase_refseq(d->rs, rpos+j));
				qbase = bscall(bam1_seq(b), qpos+j);
				if (rbase == 'C' && qbase == 'T') nC2T += 1;
				if (rbase == 'G' && qbase == 'A') nG2A += 1;
				/* printf("%c vs %c\n", toupper(rbase), qbase); */
			}
			rpos += oplen;
			qpos += oplen;
			break;
		case BAM_CINS:
			qpos += oplen;
			break;
		case BAM_CDEL:
			rpos += oplen;
			break;
		case BAM_CSOFT_CLIP:
			qpos += oplen;
			break;
		default:
			fprintf(stderr, "Unknown cigar, %u\n", op);
			abort();
		}
	}

	char key[2] = {'Z','S'};
	unsigned char *bsstrand = bam_aux_get(b, key);
	if (bsstrand) {
		bsstrand++;
		double s = similarity(nG2A, nC2T);
		if (nG2A > 1 && nC2T > 1 && s > 0.5) {
			if (conf->output_read || conf->output_all_read)
				printf("F\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '?';
			d->n_fail++;
		} else if (*bsstrand == '+' && nG2A > nC2T + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("W2C\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '-';
			d->n_corr++;
		} else if (*bsstrand == '-' && nC2T > nG2A + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("C2W\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '+';
			d->n_corr++;
		} else if (conf->output_all_read) {
			printf("N\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
		}
	} else if (!(c->flag & BAM_FUNMAP) && conf->infer_bsstrand) {
		char bss[3];
		if (similarity(nG2A, nC2T) < 0.5) {
			strcpy(bss, "??");
		} else if (nC2T > nG2A) {
			strcpy(bss, c->flag & BAM_FREVERSE ? "+-" : "++");
		} else {
			strcpy(bss, c->flag & BAM_FREVERSE ? "-+" : "--");
		}
		bam_aux_append(b, "ZS", 'Z', 3, (uint8_t*) bss);
	}

	
	if (out) samwrite(out, b);
	d->n_mapped++;

	return 0;
}
Beispiel #16
0
// prepare internal structures for clipping and alignment
// returns true if realignment was performed
bool RealignImp::compute_alignment (
    const char* q_seq,
    unsigned q_len,
    const char* r_seq, 
    unsigned r_len,
    int r_pos, 
    bool forward, 
    const uint32_t* cigar, 
    unsigned cigar_sz, 
    uint32_t*& cigar_dest, 
    unsigned& cigar_dest_sz, 
    int& new_pos,
    bool& already_perfect,
    bool& clip_failed,
    bool& alignment_failed,
    bool& unclip_failed)
{
    already_perfect = false;
    alignment_failed = false;
    unclip_failed = false;
    unsigned oplen;

    const char* q_seq_clipped = q_seq;
    const uint32_t* cigar_clipped = cigar;
    unsigned cigar_sz_clipped = cigar_sz;

    unsigned sclip_q_len, sclip_r_len, sclip_al_len;

    assert (cigar_sz);
    // reset realigner
    Reset ();

    // set clipping 
    SetClipping ((int) cliptype_, forward);

    // clip out the hard and soft clipping zones from 5" and 3"
    // The 'cut out' of the q_seq is done by switching to downstream pointer.
    if (bam_cigar_op (*cigar) == BAM_CSOFT_CLIP)
    {
        oplen = bam_cigar_oplen (*cigar);
        ClipStart (oplen);
        q_seq_clipped += oplen;
        ++cigar_clipped;
        --cigar_sz_clipped;
    }

    if (cigar_sz > 1 && bam_cigar_op (cigar [cigar_sz - 1]) == BAM_CSOFT_CLIP)
    {
        oplen = bam_cigar_oplen (cigar [cigar_sz - 1]);
        ClipEnd (oplen);
        --cigar_sz_clipped;
    }

    // cigar defines q_seq and t_seq lengths
    sclip_al_len = seq_lens_from_bin_cigar (cigar_clipped, cigar_sz_clipped, &sclip_q_len, &sclip_r_len);

    const std::string query (q_seq_clipped, sclip_q_len);
    const std::string target (r_seq, sclip_r_len);
    std::string pretty_al; pretty_al.reserve (sclip_al_len);

    pretty_al_from_bin_cigar (cigar_clipped, cigar_sz_clipped, q_seq_clipped, r_seq, pretty_al);

    // Realigner requires strings of proper size to be passed to SetSequences
    SetSequences (query, target, pretty_al, forward);

    if (!ClipAnchors (clip_failed))
    {
        already_perfect = true;
        return false; // alignment already good, no imperfect zone to realign found
    }

    // TODO avoid automatic vectors to prevent unneeded heap usage
    vector<MDelement> new_md_vec; 
    vector<CigarOp> new_cigar_vec;
    unsigned int start_pos_shift;

    if (!computeSWalignment(new_cigar_vec, new_md_vec, start_pos_shift))
    {
        alignment_failed = true;
        return false;
    }

    if (!addClippedBasesToTags(new_cigar_vec, new_md_vec, q_len))
    {
        unclip_failed = true;
        return false; // error adding back clipped out zones
    }

    if (!LeftAnchorClipped () && start_pos_shift != 0) 
    {
        // build cigar data only if it is needed
        // TODO avoid automatic vectors to prevent unneeded heap usage
        std::vector <CigarOp> cigar_vec;
        cigar_vector_from_bin (cigar, cigar_sz, cigar_vec);
        new_pos = updateReadPosition (cigar_vec, start_pos_shift, r_pos);
    }
    else
        new_pos = r_pos;

    // free (cigar_dest);
    // TODO: switch to better alignment memory management, avoid heap operations
    cigar_dest = (uint32_t*) tmap_malloc (sizeof (uint32_t) * new_cigar_vec.size (), "cigar_dest");
    cigar_dest_sz = new_cigar_vec.size ();
    cigar_vector_to_bin (new_cigar_vec, cigar_dest);

    return true;
}
Beispiel #17
0
int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
{
    bam1_t *b = 0;
    kstring_t r, q;
    int r_tid = -1;
    uint32_t *cigar2 = 0;
    int ret = 0, n2 = 0, m2 = 0, *posmap = 0;

    b = bam_init1();
    r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
    int read_ret;
    while ((read_ret = sam_read1(in, h, b)) >= 0) { // read one alignment from `in'
        // Cannot depad unmapped CRAM data
        if (b->core.flag & BAM_FUNMAP)
            goto next_seq;

        uint32_t *cigar = bam_get_cigar(b);
        n2 = 0;
        if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam_get_qname(b), h->target_name[b->core.tid]) == 0) {
            // fprintf(stderr, "[depad] Found embedded reference '%s'\n", bam_get_qname(b));
            r_tid = b->core.tid;
            if (0!=unpad_seq(b, &r)) {
                fprintf(stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in reference %s\n", bam_get_qname(b));
                return -1;
            };
            if (h->target_len[r_tid] != r.l) {
                fprintf(stderr, "[depad] ERROR: (Padded) length of '%s' is %u in BAM header, but %llu in embedded reference\n", bam_get_qname(b), h->target_len[r_tid], (unsigned long long)(r.l));
                return -1;
            }
            if (fai) {
                // Check the embedded reference matches the FASTA file
                if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &q)) {
                    fprintf(stderr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]);
                    return -1;
                }
                assert(r.l == q.l);
                int i;
                for (i = 0; i < r.l; ++i) {
                    if (r.s[i] != q.s[i]) {
                        // Show gaps as ASCII 45
                        fprintf(stderr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
                            h->target_name[b->core.tid], i+1,
                            r.s[i] ? seq_nt16_str[(int)r.s[i]] : 45,
                            q.s[i] ? seq_nt16_str[(int)q.s[i]] : 45);
                        return -1;
                    }
                }
            }
            write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
            replace_cigar(b, n2, cigar2);
            posmap = update_posmap(posmap, r);
        } else if (b->core.n_cigar > 0) {
            int i, k, op;
            if (b->core.tid < 0) {
                fprintf(stderr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam_get_qname(b));
                return -1;
            } else if (b->core.tid == r_tid) {
                ; // good case, reference available
                //fprintf(stderr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam_get_qname(b));
            } else if (fai) {
                if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
                    fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
                    return -1;
                }
                posmap = update_posmap(posmap, r);
                r_tid = b->core.tid;
                // fprintf(stderr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]);
            } else {
                fprintf(stderr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]);
                return -1;
            }
            if (0!=unpad_seq(b, &q)) {
                fprintf(stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in read %s\n", bam_get_qname(b));
                return -1;
            };
            if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) {
                write_cigar(cigar2, n2, m2, cigar[0]);
            } else if (bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP) {
                write_cigar(cigar2, n2, m2, cigar[0]);
                if (b->core.n_cigar > 2 && bam_cigar_op(cigar[1]) == BAM_CSOFT_CLIP) {
                    write_cigar(cigar2, n2, m2, cigar[1]);
                }
            }
            /* Determine CIGAR operator for each base in the aligned read */
            for (i = 0, k = b->core.pos; i < q.l; ++i, ++k)
                q.s[i] = q.s[i]? (r.s[k]? BAM_CMATCH : BAM_CINS) : (r.s[k]? BAM_CDEL : BAM_CPAD);
            /* Include any pads if starts with an insert */
            if (q.s[0] == BAM_CINS) {
                for (k = 0; k+1 < b->core.pos && !r.s[b->core.pos - k - 1]; ++k);
                if (k) write_cigar(cigar2, n2, m2, bam_cigar_gen(k, BAM_CPAD));
                k = 0;
            } else if (q.s[0] == BAM_CPAD) {
                // Join 'k' CPAD to our first cigar op CPAD too.
                for (k = 0; k+1 < b->core.pos && !r.s[b->core.pos - k - 1]; ++k);
            } else {
                k = 0;
            }
            /* Count consecutive CIGAR operators to turn into a CIGAR string */
            for (i = 1, k++, op = q.s[0]; i < q.l; ++i) {
                if (op != q.s[i]) {
                    write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
                    op = q.s[i]; k = 1;
                } else ++k;
            }
            write_cigar(cigar2, n2, m2, bam_cigar_gen(k, op));
            if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CSOFT_CLIP) {
                write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
            } else if (bam_cigar_op(cigar[b->core.n_cigar-1]) == BAM_CHARD_CLIP) {
                if (b->core.n_cigar > 2 && bam_cigar_op(cigar[b->core.n_cigar-2]) == BAM_CSOFT_CLIP) {
                    write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-2]);
                }
                write_cigar(cigar2, n2, m2, cigar[b->core.n_cigar-1]);
            }
            /* Remove redundant P operators between M/X/=/D operators, e.g. 5M2P10M -> 15M */
            int pre_op, post_op;
            for (i = 2; i < n2; ++i)
                if (bam_cigar_op(cigar2[i-1]) == BAM_CPAD) {
                    pre_op = bam_cigar_op(cigar2[i-2]);
                    post_op = bam_cigar_op(cigar2[i]);
                    /* Note don't need to check for X/= as code above will use M only */
                    if ((pre_op == BAM_CMATCH || pre_op == BAM_CDEL) && (post_op == BAM_CMATCH || post_op == BAM_CDEL)) {
                        /* This is a redundant P operator */
                        cigar2[i-1] = 0; // i.e. 0M
                        /* If had same operator either side, combine them in post_op */
                        if (pre_op == post_op) {
                            /* If CIGAR M, could treat as simple integers since BAM_CMATCH is zero*/
                            cigar2[i] = bam_cigar_gen(bam_cigar_oplen(cigar2[i-2]) + bam_cigar_oplen(cigar2[i]), post_op);
                            cigar2[i-2] = 0; // i.e. 0M
                        }
                    }
                }
            /* Remove the zero'd operators (0M) */
            for (i = k = 0; i < n2; ++i)
                if (cigar2[i]) cigar2[k++] = cigar2[i];
            n2 = k;
            replace_cigar(b, n2, cigar2);
        }
        /* Even unmapped reads can have a POS value, e.g. if their mate was mapped */
        if (b->core.pos != -1) b->core.pos = posmap[b->core.pos];
        if (b->core.mtid < 0 || b->core.mpos < 0) {
            /* Nice case, no mate to worry about*/
            // fprintf(stderr, "[depad] Read '%s' mate not mapped\n", bam_get_qname(b));
            /* TODO - Warning if FLAG says mate should be mapped? */
            /* Clean up funny input where mate position is given but mate reference is missing: */
            b->core.mtid = -1;
            b->core.mpos = -1;
        } else if (b->core.mtid == b->core.tid) {
            /* Nice case, same reference */
            // fprintf(stderr, "[depad] Read '%s' mate mapped to same ref\n", bam_get_qname(b));
            b->core.mpos = posmap[b->core.mpos];
        } else {
            /* Nasty case, Must load alternative posmap */
            // fprintf(stderr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]);
            if (!fai) {
                fprintf(stderr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
                return -1;
            }
            /* Temporarily load the other reference sequence */
            if (load_unpadded_ref(fai, h->target_name[b->core.mtid], h->target_len[b->core.mtid], &r)) {
                fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
                return -1;
            }
            posmap = update_posmap(posmap, r);
            b->core.mpos = posmap[b->core.mpos];
            /* Restore the reference and posmap*/
            if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
                fprintf(stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
                return -1;
            }
            posmap = update_posmap(posmap, r);
        }
        /* Most reads will have been moved so safest to always recalculate the BIN value */
        b->core.bin = bam_reg2bin(b->core.pos, bam_endpos(b));

    next_seq:
        sam_write1(out, h, b);
    }
    if (read_ret < -1) {
        fprintf(stderr, "[depad] truncated file.\n");
        ret = 1;
    }
    free(r.s); free(q.s); free(posmap);
    bam_destroy1(b);
    return ret;
}