void dump_read(bam1_t* b) { printf("->core.tid:(%d)\n", b->core.tid); printf("->core.pos:(%d)\n", b->core.pos); printf("->core.bin:(%d)\n", b->core.bin); printf("->core.qual:(%d)\n", b->core.qual); printf("->core.l_qname:(%d)\n", b->core.l_qname); printf("->core.flag:(%d)\n", b->core.flag); printf("->core.n_cigar:(%d)\n", b->core.n_cigar); printf("->core.l_qseq:(%d)\n", b->core.l_qseq); printf("->core.mtid:(%d)\n", b->core.mtid); printf("->core.mpos:(%d)\n", b->core.mpos); printf("->core.isize:(%d)\n", b->core.isize); if (b->data) { printf("->data:"); int i; for (i = 0; i < b->l_data; ++i) { printf("%x ", b->data[i]); } printf("\n"); } if (b->core.l_qname) { printf("qname: %s\n",bam_get_qname(b)); } if (b->core.l_qseq) { printf("qseq:"); int i; for (i = 0; i < b->core.l_qseq; ++i) { printf("%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]); } printf("\n"); printf("qual:"); for (i = 0; i < b->core.l_qseq; ++i) { printf("%c",bam_get_qual(b)[i]); } printf("\n"); } if (bam_get_l_aux(b)) { int i = 0; uint8_t* aux = bam_get_aux(b); while (i < bam_get_l_aux(b)) { printf("%.2s:%c:",aux+i,*(aux+i+2)); i += 2; switch (*(aux+i)) { case 'Z': while (*(aux+1+i) != '\0') { putc(*(aux+1+i), stdout); ++i; } break; } putc('\n',stdout); ++i;++i; } } printf("\n"); }
int bam_read1_fromArray(char *bamChar, bam1_t *b) //modified from samtools bam_read1 to assign BAM record in mmemry to bam structure { bam1_core_t *c = &b->core; int32_t block_len; //, ret, i; // // uint32_t x[8]; // // if ((ret = bgzf_read(fp, &block_len, 4)) != 4) { // // if (ret == 0) return -1; // normal end-of-file // // else return -2; // truncated // // } uint32_t *x; uint32_t *bamU32=(uint32_t*) bamChar; block_len=bamU32[0]; // // if (bgzf_read(fp, x, 32) != 32) return -3; // // if (fp->is_be) { // // ed_swap_4p(&block_len); // // for (i = 0; i < 8; ++i) ed_swap_4p(x + i); // // } x=bamU32+1; c->tid = x[0]; c->pos = x[1]; c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; c->l_qseq = x[4]; c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7]; b->l_data = block_len - 32; if (b->l_data < 0 || c->l_qseq < 0) return -4; if ((char *)bam_get_aux(b) - (char *)b->data > b->l_data) return -4; if (b->m_data < b->l_data) { b->m_data = b->l_data; kroundup32(b->m_data); b->data = (uint8_t*)realloc(b->data, b->m_data); if (!b->data) return -4; } // // if (bgzf_read(fp, b->data, b->l_data) != b->l_data) return -4; // // //b->l_aux = b->l_data - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; // // if (fp->is_be) swap_data(c, b->l_data, b->data, 0); b->data=(uint8_t*) bamChar+4*9; return 4 + block_len; }
static int trim_ns(bam1_t *b, void *data) { int ret = 0; opts_t *op((opts_t *)data); std::vector<uint8_t> aux(bam_get_aux(b), bam_get_aux(b) + bam_get_l_aux(b)); int tmp; uint8_t *const seq(bam_get_seq(b)); uint32_t *const cigar(bam_get_cigar(b)); //op->n_cigar = b->core.n_cigar; op->resize(b->l_data); // Make sure it's big enough to hold everything. memcpy(op->data, b->data, b->core.l_qname); // Get #Ns at the beginning for(tmp = 0; bam_seqi(seq, tmp) == dlib::htseq::HTS_N; ++tmp); const int n_start(tmp); if(tmp == b->core.l_qseq - 1) // all bases are N -- garbage read ret |= op->skip_all_ns; // Get #Ns at the end for(tmp = b->core.l_qseq - 1; bam_seqi(seq, tmp) == dlib::htseq::HTS_N; --tmp); const int n_end(b->core.l_qseq - 1 - tmp); // Get new length for read int final_len(b->core.l_qseq - n_end - n_start); if(final_len < 0) final_len = 0; if(final_len < op->min_trimmed_len) // Too short. ret |= 1; // Copy in qual and all of aux. if(n_end) { if((tmp = bam_cigar_oplen(cigar[b->core.n_cigar - 1]) - n_end) == 0) { LOG_DEBUG("Entire cigar operation is the softclip. Decrease the number of new cigar operations.\n"); --b->core.n_cigar; } else { LOG_DEBUG("Updating second cigar operation in-place.\n"); cigar[b->core.n_cigar - 1] = bam_cigar_gen(tmp, BAM_CSOFT_CLIP); } } // Get new n_cigar. if((tmp = bam_cigar_oplen(*cigar) - n_start) == 0) { memcpy(op->data + b->core.l_qname, cigar + 1, (--b->core.n_cigar) << 2); // << 2 for 4 bit per cigar op } else { if(n_start) *cigar = bam_cigar_gen(tmp, BAM_CSOFT_CLIP); memcpy(op->data + b->core.l_qname, cigar, b->core.n_cigar << 2); } uint8_t *opseq(op->data + b->core.l_qname + (b->core.n_cigar << 2)); // Pointer to the seq region of new data field. for(tmp = 0; tmp < final_len >> 1; ++tmp) opseq[tmp] = (bam_seqi(seq, ((tmp << 1) + n_start)) << 4) | (bam_seqi(seq, (tmp << 1) + n_start + 1)); if(final_len & 1) opseq[tmp] = (bam_seqi(seq, ((tmp << 1) + n_start)) << 4); tmp = bam_get_l_aux(b); memcpy(opseq + ((final_len + 1) >> 1), bam_get_qual(b) + n_start, final_len + tmp); // Switch data strings std::swap(op->data, b->data); b->core.l_qseq = final_len; memcpy(bam_get_aux(b), aux.data(), aux.size()); b->l_data = (bam_get_aux(b) - b->data) + aux.size(); if(n_end) bam_aux_append(b, "NE", 'i', sizeof(int), (uint8_t *)&n_end); if(n_start) bam_aux_append(b, "NS", 'i', sizeof(int), (uint8_t *)&n_start); const uint32_t *pvar((uint32_t *)dlib::array_tag(b, "PV")); tmp = b->core.flag & BAM_FREVERSE ? n_end: n_start; if(pvar) { std::vector<uint32_t>pvals(pvar + tmp, pvar + final_len + tmp); bam_aux_del(b, (uint8_t *)(pvar) - 6); dlib::bam_aux_array_append(b, "PV", 'I', sizeof(uint32_t), final_len, (uint8_t *)pvals.data()); } const uint32_t *fvar((uint32_t *)dlib::array_tag(b, "FA")); if(fvar) { std::vector<uint32_t>fvals(fvar + tmp, fvar + final_len + tmp); bam_aux_del(b, (uint8_t *)(fvar) - 6); dlib::bam_aux_array_append(b, "FA", 'I', sizeof(uint32_t), final_len, (uint8_t *)fvals.data()); } return ret; }