// Returns 0 on success, -1 on failure. static int sync_mq_mc(bam1_t* src, bam1_t* dest) { if ( (src->core.flag & BAM_FUNMAP) == 0 ) { // If mapped // Copy Mate Mapping Quality uint32_t mq = src->core.qual; uint8_t* data; if ((data = bam_aux_get(dest,"MQ")) != NULL) { bam_aux_del(dest, data); } bam_aux_append(dest, "MQ", 'i', sizeof(uint32_t), (uint8_t*)&mq); } // Copy mate cigar if either read is mapped if ( (src->core.flag & BAM_FUNMAP) == 0 || (dest->core.flag & BAM_FUNMAP) == 0 ) { uint8_t* data_mc; if ((data_mc = bam_aux_get(dest,"MC")) != NULL) { bam_aux_del(dest, data_mc); } // Convert cigar to string kstring_t mc = { 0, 0, NULL }; if (bam_format_cigar(src, &mc) < 0) return -1; bam_aux_append(dest, "MC", 'Z', ks_len(&mc)+1, (uint8_t*)ks_str(&mc)); free(mc.s); } return 0; }
/* * This function calculates ct tag for two bams, it assumes they are from the same template and * writes the tag to the first read in position terms. */ static void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str) { bam1_t *swap; int i, end; uint32_t *cigar; str->l = 0; if (b1->core.tid != b2->core.tid || b1->core.tid < 0 || b1->core.pos < 0 || b2->core.pos < 0 || b1->core.flag&BAM_FUNMAP || b2->core.flag&BAM_FUNMAP) return; // coordinateless or not on the same chr; skip if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam_get_cigar(b1); i < b1->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } end = bam_endpos(b1); kputw(b2->core.pos - end, str); kputc('T', str); kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam_get_cigar(b2); i < b2->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } uint8_t* data; if ((data = bam_aux_get(b1,"ct")) != NULL) bam_aux_del(b1, data); if ((data = bam_aux_get(b2,"ct")) != NULL) bam_aux_del(b2, data); bam_aux_append(b1, "ct", 'Z', str->l+1, (uint8_t*)str->s); }
static int test_update_array(bam1_t *aln, const char target_id[2], uint8_t type, uint32_t nitems, void *data, const char next_id[2], int64_t next_val, char next_type) { uint8_t *p; // Try updating target if (bam_aux_update_array(aln, target_id, type, nitems, data) < 0) { fail("update %2.s tag", target_id); return -1; } // Check values p = bam_aux_get(aln, target_id); if (!p) { fail("find %.2s tag", target_id); return -1; } switch (type) { case 'c': CHECK_ARRAY_VALS(int8_t, bam_auxB2i, PRId64, PRId8); break; case 'C': CHECK_ARRAY_VALS(uint8_t, bam_auxB2i, PRId64, PRIu8); break; case 's': CHECK_ARRAY_VALS(int16_t, bam_auxB2i, PRId64, PRId16); break; case 'S': CHECK_ARRAY_VALS(uint16_t, bam_auxB2i, PRId64, PRIu16); break; case 'i': CHECK_ARRAY_VALS(int32_t, bam_auxB2i, PRId64, PRId32); break; case 'I': CHECK_ARRAY_VALS(uint32_t, bam_auxB2i, PRId64, PRIu32); break; case 'f': CHECK_ARRAY_VALS(float, bam_auxB2f, "e", "e"); break; } // If given, check that the next tag hasn't been clobbered by the // update above. if (!*next_id) return 0; p = bam_aux_get(aln, next_id); if (!p) { fail("find %.2s tag after updating %.2s", next_id, target_id); return -1; } if (*p != next_type || bam_aux2i(p) != next_val) { fail("after updating %.2s:" " %.2s field is %c:%"PRId64"; expected %c:%"PRId64, target_id, next_id, *p, bam_aux2i(p), next_type, next_val); return -1; } return 0; }
static int mplp_func(void *data, bam1_t *b) { extern int bam_realn(bam1_t *b, const char *ref); extern int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag); extern int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres); char *ref; mplp_aux_t *ma = (mplp_aux_t*)data; int ret, skip = 0, ref_len; do { int has_ref; ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b); if (ret < 0) break; // The 'B' cigar operation is not part of the specification, considering as obsolete. // bam_remove_B(b); if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads skip = 1; continue; } if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; } if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; } if (ma->conf->bed) { // test overlap skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_endpos(b)); if (skip) continue; } if (ma->conf->rghash) { // exclude read groups uint8_t *rg = bam_aux_get(b, "RG"); skip = (rg && khash_str2int_get(ma->conf->rghash, (const char*)(rg+1), NULL)==0); if (skip) continue; } if (ma->conf->flag & MPLP_ILLUMINA13) { int i; uint8_t *qual = bam_get_qual(b); for (i = 0; i < b->core.l_qseq; ++i) qual[i] = qual[i] > 31? qual[i] - 31 : 0; } if (ma->conf->fai && b->core.tid >= 0) { has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len); if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence fprintf(stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n", __func__, b->core.pos, ref_len, b->core.tid); skip = 1; continue; } } else { has_ref = 0; } skip = 0; if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3); if (has_ref && ma->conf->capQ_thres > 10) { int q = bam_cap_mapQ(b, ref, ref_len, ma->conf->capQ_thres); if (q < 0) skip = 1; else if (b->core.qual > q) b->core.qual = q; } if (b->core.qual < ma->conf->min_mq) skip = 1; else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) skip = 1; } while (skip); return ret; }
static char *sw_align_get_cq(bam1_t *b) { uint8_t *c = bam_aux_get(b, "CQ"); // return the base if the tag was not found if(0 == c) return NULL; return bam_aux2Z(c); }
SR_Status SR_LoadPairStats(SR_PairStats* pPairStats, const bam1_t* pAlignment, const SR_LibInfoTable* pTable) { pPairStats->pairMode = SR_GetPairMode(pAlignment); if (pPairStats->pairMode == SR_BAD_PAIR_MODE) return SR_ERR; pPairStats->fragLen = abs(pAlignment->core.isize); if (pAlignment->core.tid != pAlignment->core.mtid) pPairStats->fragLen = -1; static const char tagRG[2] = {'R', 'G'}; uint8_t* rgPos = bam_aux_get(pAlignment, tagRG); if (rgPos != NULL) { const char* RG = bam_aux2Z(rgPos); SR_Status status = SR_LibInfoTableGetRGIndex(&(pPairStats->readGrpID), pTable, RG); if (status != SR_OK) return SR_ERR; SR_SeqTech seqTech = pTable->pSeqTech[pPairStats->readGrpID]; pPairStats->pairMode = SR_SeqTechMap[seqTech][pPairStats->pairMode]; } else return SR_ERR; return SR_OK; }
static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf, int n, char *const*fn, int *n_plp, const bam_pileup1_t **plp, int ignore_rg) { int i, j; memset(m->n_plp, 0, m->n * sizeof(int)); for (i = 0; i < n; ++i) { for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; uint8_t *q; int id = -1; q = ignore_rg? 0 : bam_aux_get(p->b, "RG"); if (q) id = bam_smpl_rg2smid(sm, fn[i], (char*)q+1, buf); if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf); if (id < 0 || id >= m->n) { assert(q); // otherwise a bug fprintf(stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]); exit(1); } if (m->n_plp[id] == m->m_plp[id]) { m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8; m->plp[id] = realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]); } m->plp[id][m->n_plp[id]++] = *p; } } }
bool isMappedUnique() const { assert(m_dataPtr); uint8_t *hits = bam_aux_get(m_dataPtr.get(), "NH"); if (hits != NULL) return (bam_aux2i(hits) == 1); return false; }
int32_t getMismatches() const { assert(m_dataPtr); uint8_t *mm = bam_aux_get(m_dataPtr.get(), "NM"); if (mm != NULL) return bam_aux2i(mm); return NO_COLOR_MM; }
// FIXME: we should also check the LB tag associated with each alignment const char *bam_get_library(bam_header_t *h, const bam1_t *b) { const uint8_t *rg; if (h->dict == 0) h->dict = sam_header_parse2(h->text); if (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB"); rg = bam_aux_get(b, "RG"); return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1)); }
std::string getColorQvsStr() const { assert(m_dataPtr); uint8_t *aux = bam_aux_get(m_dataPtr.get(), "CQ"); if (aux != NULL) return bam_aux2Z(aux); return ""; }
int32_t getReportedAlignments() const { assert(m_dataPtr); uint8_t *hits = bam_aux_get(m_dataPtr.get(), "NH"); if (hits != NULL) return bam_aux2i(hits); return NO_NH; }
int getIndelAmbiguity() const { assert(m_dataPtr); uint8_t *amb = bam_aux_get(m_dataPtr.get(), "XA"); if (amb != NULL) return bam_aux2i(amb); return INDEL_NO_AMBIGUITY; }
int32_t tmap_sam_get_fo_start_idx(tmap_sam_t *sam) { uint8_t *tag = NULL; // ZF tag = bam_aux_get(sam->b, "ZF"); if(NULL != tag) return bam_aux2i(tag); else return -1; }
int32_t getMapQual() const // comes from SM tag, applies to this read if this read has a mate. { assert(m_dataPtr); //assert(shouldHaveMate()); // This will be controversial! uint8_t *mq = bam_aux_get(m_dataPtr.get(), "SM"); if (mq != NULL) return bam_aux2i(mq); return NO_MAP_QUAL; }
Alignment bam_to_alignment(const bam1_t *b, map<string, string>& rg_sample) { Alignment alignment; // get the sequence and qual int32_t lqseq = b->core.l_qseq; string sequence; sequence.resize(lqseq); uint8_t* qualptr = bam_get_qual(b); string quality;//(lqseq, 0); quality.assign((char*)qualptr, lqseq); // process the sequence into chars uint8_t* seqptr = bam_get_seq(b); for (int i = 0; i < lqseq; ++i) { sequence[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(seqptr, i)]; } // get the read group and sample name uint8_t *rgptr = bam_aux_get(b, "RG"); char* rg = (char*) (rgptr+1); //if (!rg_sample string sname; if (!rg_sample.empty()) { sname = rg_sample[string(rg)]; } // Now name the read after the scaffold string read_name = bam_get_qname(b); // Decide if we are a first read (/1) or second (last) read (/2) if(b->core.flag & BAM_FREAD1) { read_name += "/1"; } if(b->core.flag & BAM_FREAD2) { read_name += "/2"; } // If we are marked as both first and last we get /1/2, and if we are marked // as neither the scaffold name comes through unchanged as the read name. // TODO: produce correct names for intermediate reads on >2 read scaffolds. // add features to the alignment alignment.set_name(read_name); alignment.set_sequence(sequence); alignment.set_quality(quality); // TODO: htslib doesn't wrap this flag for some reason. alignment.set_is_secondary(b->core.flag & BAM_FSECONDARY); if (sname.size()) { alignment.set_sample_name(sname); alignment.set_read_group(rg); } return alignment; }
int32_t tmap_sam_get_za(tmap_sam_t *sam) { uint8_t *tag = NULL; // ZA if(NULL == sam->b) tmap_bug(); tag = bam_aux_get(sam->b, "ZA"); if(NULL != tag) return bam_aux2i(tag); else return -1; }
std::string getReadGroupId() const { assert(m_dataPtr); uint8_t *aux = bam_aux_get(m_dataPtr.get(), "RG"); if (aux) { return bam_aux2Z(aux); } return ""; }
static int test_update_int(bam1_t *aln, const char target_id[2], int64_t target_val, char expected_type, const char next_id[2], int64_t next_val, char next_type) { uint8_t *p; // Try updating target if (bam_aux_update_int(aln, target_id, target_val) < 0) { fail("update %.2s tag", target_id); return -1; } // Check it's there and has the right type and value p = bam_aux_get(aln, target_id); if (!p) { fail("find %.2s tag", target_id); return -1; } if (*p != expected_type || bam_aux2i(p) != target_val) { fail("%.2s field is %c:%"PRId64"; expected %c:%"PRId64, target_id, *p, bam_aux2i(p), expected_type, target_val); return -1; } // If given, check that the next tag hasn't been clobbered by the // update above. if (!*next_id) return 0; p = bam_aux_get(aln, next_id); if (!p) { fail("find %.2s tag after updating %.2s", next_id, target_id); return -1; } if (*p != next_type || bam_aux2i(p) != next_val) { fail("after updating %.2s to %"PRId64":" " %.2s field is %c:%"PRId64"; expected %c:%"PRId64, target_id, target_val, next_id, *p, bam_aux2i(p), next_type, next_val); return -1; } return 0; }
uint8_t *check_bam_aux_get(const bam1_t *aln, const char *tag, char type) { uint8_t *p = bam_aux_get(aln, tag); if (p) { if (*p == type) return p; else fail("%s field of type '%c', expected '%c'\n", tag, *p, type); } else fail("can't find %s field\n", tag); return NULL; }
static void orphan_only_func(const state_t* state, bam1_t* file_read) { uint8_t* data = (uint8_t*)strdup(state->rg_id); int len = strlen(state->rg_id)+1; // If the old exists don't do anything uint8_t* old = bam_aux_get(file_read, "RG"); if (old == NULL) { bam_aux_append(file_read, "RG",'Z',len,data); } free(data); }
char* tmap_sam_get_rg_id(tmap_sam_t *sam) { uint8_t *tag = NULL; char *value = NULL; // RG tag = bam_aux_get(sam->b, "RG"); if(NULL == tag) return NULL; value = bam_aux2Z(tag); return value; }
// Returns 0 to indicate read should be output 1 otherwise static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settings) { if (settings->remove_B) bam_remove_B(b); if (settings->min_qlen > 0) { int k, qlen = 0; uint32_t *cigar = bam_get_cigar(b); for (k = 0; k < b->core.n_cigar; ++k) if ((bam_cigar_type(bam_cigar_op(cigar[k]))&1) || bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP) qlen += bam_cigar_oplen(cigar[k]); if (qlen < settings->min_qlen) return 1; } if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off)) return 1; if (settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, h->target_name[b->core.tid], b->core.pos, bam_endpos(b)))) return 1; if (settings->subsam_frac > 0.) { uint32_t k = __ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(b)) ^ settings->subsam_seed); if ((double)(k&0xffffff) / 0x1000000 >= settings->subsam_frac) return 1; } if (settings->rghash) { uint8_t *s = bam_aux_get(b, "RG"); if (s) { khint_t k = kh_get(rg, settings->rghash, (char*)(s + 1)); if (k == kh_end(settings->rghash)) return 1; } } if (settings->library) { const char *p = bam_get_library((bam_hdr_t*)h, b); if (!p || strcmp(p, settings->library) != 0) return 1; } if (settings->remove_aux_len) { size_t i; for (i = 0; i < settings->remove_aux_len; ++i) { uint8_t *s = bam_aux_get(b, settings->remove_aux[i]); if (s) { bam_aux_del(b, s); } } } return 0; }
QualityValueArray getColorQvs() const { assert(m_dataPtr); uint8_t *aux = bam_aux_get(m_dataPtr.get(), "CQ"); if (aux) { std::string qvsStr = bam_aux2Z(aux); return asciiToQvs(qvsStr); } return QualityValueArray(); }
static void sync_mq(bam1_t* src, bam1_t* dest) { if ( (src->core.flag & BAM_FUNMAP) == 0 ) { // If mapped uint32_t mq = src->core.qual; uint8_t* data; if ((data = bam_aux_get(dest,"MQ")) != NULL) { bam_aux_del(dest, data); } bam_aux_append(dest, "MQ", 'i', sizeof(uint32_t), (uint8_t*)&mq); } }
int bam_smpl_get_sample_id(bam_smpl_t *bsmpl, int bam_id, bam1_t *bam_rec) { file_t *file = &bsmpl->files[bam_id]; if ( file->default_idx >= 0 ) return file->default_idx; char *aux_rg = (char*) bam_aux_get(bam_rec, "RG"); aux_rg = aux_rg ? aux_rg+1 : "?"; int rg_id; if ( khash_str2int_get(file->rg2idx, aux_rg, &rg_id)==0 ) return rg_id; if ( khash_str2int_get(file->rg2idx, "?", &rg_id)==0 ) return rg_id; return -1; }
static int64_t get_mate_score(bam1_t *b) { uint8_t *data; int64_t score; if ((data = bam_aux_get(b, "ms"))) { score = bam_aux2i(data); } else { fprintf(stderr, "[markdup] error: no ms score tag.\n"); return -1; } return score; }
static void overwrite_all_func(const state_t* state, bam1_t* file_read) { uint8_t* data = (uint8_t*)strdup(state->rg_id); int len = strlen(state->rg_id)+1; // If the old exists delete it uint8_t* old = bam_aux_get(file_read, "RG"); if (old != NULL) { bam_aux_del(file_read, old); } bam_aux_append(file_read, "RG", 'Z', len, data); free(data); }
/*! @abstract Get the color quality of the color encoding the previous and current base @param b pointer to an alignment @param i The i-th position, 0-based @return color quality @discussion Returns 0 no color information is found. */ char bam_aux_getCQi(bam1_t *b, int i) { uint8_t *c = bam_aux_get(b, "CQ"); char *cq = NULL; // return the base if the tag was not found if(0 == c) return 0; cq = bam_aux2Z(c); // adjust for strandedness if(bam1_strand(b)) i = strlen(cq) - 1 - i; return cq[i]; }
int32_t tmap_sam_get_flowgram(tmap_sam_t *sam, uint16_t **flowgram) { uint8_t *tag = NULL; int32_t len = -1; (*flowgram) = NULL; // FZ tag = bam_aux_get(sam->b, "FZ"); if(NULL != tag) { (*flowgram) = bam_auxB2S(tag, &len); } return len; }