Example #1
0
void dump_read(bam1_t* b) {
    printf("->core.tid:(%d)\n", b->core.tid);
    printf("->core.pos:(%d)\n", b->core.pos);
    printf("->core.bin:(%d)\n", b->core.bin);
    printf("->core.qual:(%d)\n", b->core.qual);
    printf("->core.l_qname:(%d)\n", b->core.l_qname);
    printf("->core.flag:(%d)\n", b->core.flag);
    printf("->core.n_cigar:(%d)\n", b->core.n_cigar);
    printf("->core.l_qseq:(%d)\n", b->core.l_qseq);
    printf("->core.mtid:(%d)\n", b->core.mtid);
    printf("->core.mpos:(%d)\n", b->core.mpos);
    printf("->core.isize:(%d)\n", b->core.isize);
    if (b->data) {
        printf("->data:");
        int i;
        for (i = 0; i < b->l_data; ++i) {
            printf("%x ", b->data[i]);
        }
        printf("\n");
    }
    if (b->core.l_qname) {
        printf("qname: %s\n",bam_get_qname(b));
    }
    if (b->core.l_qseq) {
        printf("qseq:");
        int i;
        for (i = 0; i < b->core.l_qseq; ++i) {
            printf("%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]);
        }
        printf("\n");
        printf("qual:");
        for (i = 0; i < b->core.l_qseq; ++i) {
            printf("%c",bam_get_qual(b)[i]);
        }
        printf("\n");

    }

    if (bam_get_l_aux(b)) {
        int i = 0;
        uint8_t* aux = bam_get_aux(b);

        while (i < bam_get_l_aux(b)) {
            printf("%.2s:%c:",aux+i,*(aux+i+2));
            i += 2;
            switch (*(aux+i)) {
                case 'Z':
                    while (*(aux+1+i) != '\0') { putc(*(aux+1+i), stdout); ++i; }
                    break;
            }
            putc('\n',stdout);
            ++i;++i;
        }
    }
    printf("\n");
}
Example #2
0
int bam_read1_fromArray(char *bamChar, bam1_t *b) //modified from samtools bam_read1 to assign BAM record in mmemry to bam structure
{
	bam1_core_t *c = &b->core;
	int32_t block_len; //, ret, i;
// // 	uint32_t x[8];
// // 	if ((ret = bgzf_read(fp, &block_len, 4)) != 4) {
// // 		if (ret == 0) return -1; // normal end-of-file
// // 		else return -2; // truncated
// // 	}
 	uint32_t *x;

    uint32_t *bamU32=(uint32_t*) bamChar;
    block_len=bamU32[0];
    
// // 	if (bgzf_read(fp, x, 32) != 32) return -3;
// // 	if (fp->is_be) {
// // 		ed_swap_4p(&block_len);
// // 		for (i = 0; i < 8; ++i) ed_swap_4p(x + i);
// // 	}
    x=bamU32+1;
    
	c->tid = x[0]; c->pos = x[1];
	c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
	c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
	c->l_qseq = x[4];
	c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
	b->l_data = block_len - 32;
	if (b->l_data < 0 || c->l_qseq < 0) return -4;
	if ((char *)bam_get_aux(b) - (char *)b->data > b->l_data)
		return -4;
	if (b->m_data < b->l_data) {
		b->m_data = b->l_data;
		kroundup32(b->m_data);
		b->data = (uint8_t*)realloc(b->data, b->m_data);
		if (!b->data)
			return -4;
	}
// // 	if (bgzf_read(fp, b->data, b->l_data) != b->l_data) return -4;
// // 	//b->l_aux = b->l_data - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
// // 	if (fp->is_be) swap_data(c, b->l_data, b->data, 0);
    b->data=(uint8_t*) bamChar+4*9;
    
	return 4 + block_len;
}
Example #3
0
static int trim_ns(bam1_t *b, void *data) {
    int ret = 0;
    opts_t *op((opts_t *)data);
    std::vector<uint8_t> aux(bam_get_aux(b), bam_get_aux(b) + bam_get_l_aux(b));
    int tmp;
    uint8_t *const seq(bam_get_seq(b));
    uint32_t *const cigar(bam_get_cigar(b));
    //op->n_cigar = b->core.n_cigar;
    op->resize(b->l_data); // Make sure it's big enough to hold everything.
    memcpy(op->data, b->data, b->core.l_qname);

    // Get #Ns at the beginning
    for(tmp = 0; bam_seqi(seq, tmp) == dlib::htseq::HTS_N; ++tmp);
    const int n_start(tmp);

    if(tmp == b->core.l_qseq - 1) // all bases are N -- garbage read
         ret |= op->skip_all_ns;

    // Get #Ns at the end
    for(tmp = b->core.l_qseq - 1; bam_seqi(seq, tmp) == dlib::htseq::HTS_N; --tmp);
    const int n_end(b->core.l_qseq - 1 - tmp);

    // Get new length for read
    int final_len(b->core.l_qseq - n_end - n_start);
    if(final_len < 0) final_len = 0;
    if(final_len < op->min_trimmed_len) // Too short.
        ret |= 1;
    // Copy in qual and all of aux.

    if(n_end) {
        if((tmp = bam_cigar_oplen(cigar[b->core.n_cigar - 1]) - n_end) == 0) {
            LOG_DEBUG("Entire cigar operation is the softclip. Decrease the number of new cigar operations.\n");
            --b->core.n_cigar;
        } else {
            LOG_DEBUG("Updating second cigar operation in-place.\n");
            cigar[b->core.n_cigar - 1] = bam_cigar_gen(tmp, BAM_CSOFT_CLIP);
        }
    }

    // Get new n_cigar.
    if((tmp = bam_cigar_oplen(*cigar) - n_start) == 0) {
        memcpy(op->data + b->core.l_qname, cigar + 1, (--b->core.n_cigar) << 2); // << 2 for 4 bit per cigar op
    } else {
        if(n_start) *cigar = bam_cigar_gen(tmp, BAM_CSOFT_CLIP);
        memcpy(op->data + b->core.l_qname, cigar, b->core.n_cigar << 2);
    }
    uint8_t *opseq(op->data + b->core.l_qname + (b->core.n_cigar << 2)); // Pointer to the seq region of new data field.
    for(tmp = 0; tmp < final_len >> 1; ++tmp)
        opseq[tmp] = (bam_seqi(seq, ((tmp << 1) + n_start)) << 4) | (bam_seqi(seq, (tmp << 1) + n_start + 1));
    if(final_len & 1)
        opseq[tmp] = (bam_seqi(seq, ((tmp << 1) + n_start)) << 4);

    tmp = bam_get_l_aux(b);
    memcpy(opseq + ((final_len + 1) >> 1), bam_get_qual(b) + n_start, final_len + tmp);
    // Switch data strings
    std::swap(op->data, b->data);
    b->core.l_qseq = final_len;
    memcpy(bam_get_aux(b), aux.data(), aux.size());
    b->l_data = (bam_get_aux(b) - b->data) + aux.size();
    if(n_end) bam_aux_append(b, "NE", 'i', sizeof(int), (uint8_t *)&n_end);
    if(n_start) bam_aux_append(b, "NS", 'i', sizeof(int), (uint8_t *)&n_start);
    const uint32_t *pvar((uint32_t *)dlib::array_tag(b, "PV"));
    tmp = b->core.flag & BAM_FREVERSE ? n_end: n_start;
    if(pvar) {
        std::vector<uint32_t>pvals(pvar + tmp, pvar + final_len + tmp);
        bam_aux_del(b, (uint8_t *)(pvar) - 6);
        dlib::bam_aux_array_append(b, "PV", 'I', sizeof(uint32_t), final_len, (uint8_t *)pvals.data());
    }
    const uint32_t *fvar((uint32_t *)dlib::array_tag(b, "FA"));
    if(fvar) {
        std::vector<uint32_t>fvals(fvar + tmp, fvar + final_len + tmp);
        bam_aux_del(b, (uint8_t *)(fvar) - 6);
        dlib::bam_aux_array_append(b, "FA", 'I', sizeof(uint32_t), final_len, (uint8_t *)fvals.data());
    }
    return ret;
}