int bamGetTargetLength(const bam1_t *bam) /* Tally up the alignment's length on the reference sequence from * bam's packed-int CIGAR representation. */ { unsigned int *cigarPacked = bam1_cigar(bam); const bam1_core_t *core = &bam->core; int tLength=0; int i; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigarPacked[i], &op); switch (op) { case 'M': // match or mismatch (gapless aligned block) case '=': // match case 'X': // mismatch tLength += n; break; case 'I': // inserted in query break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tLength += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op); } } return tLength; }
struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset, boolean useStrand, char **retQSeq) /* Convert from bam to ffAli format. If retQSeq is non-null, set it to the * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */ { struct ffAli *ffList = NULL, *ff; const bam1_core_t *core = &bam->core; boolean isRc = useStrand && bamIsRc(bam); DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand); if (retQSeq) *retQSeq = needle; if (isRc) reverseComplement(target->dna, target->size); DNA *haystack = target->dna; unsigned int *cigarPacked = bam1_cigar(bam); int tStart = targetOffset, qStart = 0, i; // If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up. int iStart = isRc ? (core->n_cigar - 1) : 0; int iIncr = isRc ? -1 : 1; for (i = iStart; isRc ? (i >= 0) : (i < core->n_cigar); i += iIncr) { char op; int size = bamUnpackCigarElement(cigarPacked[i], &op); switch (op) { case 'M': // match or mismatch (gapless aligned block) case '=': // match case 'X': // mismatch AllocVar(ff); ff->left = ffList; ffList = ff; ff->nStart = needle + qStart; ff->nEnd = ff->nStart + size; ff->hStart = haystack + tStart - targetOffset; ff->hEnd = ff->hStart + size; tStart += size; qStart += size; break; case 'I': // inserted in query case 'S': // skipped query bases at beginning or end ("soft clipping") qStart += size; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tStart += size; break; case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op); } } ffList = ffMakeRightLinks(ffList); ffCountGoodEnds(ffList); return ffList; }
void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at * the beginning of the query sequence and quality; likewise for retHigh at end. * For convenience, retClippedQLen is the original query length minus soft clipping * (and the length of the query sequence that will be returned). */ { unsigned int *cigarPacked = bam1_cigar(bam); const bam1_core_t *core = &bam->core; char op; int n = bamUnpackCigarElement(cigarPacked[0], &op); int low = (op == 'S') ? n : 0; n = bamUnpackCigarElement(cigarPacked[core->n_cigar-1], &op); int high = (op == 'S') ? n : 0; if (retLow != NULL) *retLow = low; if (retHigh != NULL) *retHigh = high; if (retClippedQLen != NULL) *retClippedQLen = (core->l_qseq - low - high); }
void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar) /* Unpack CIGAR string into dynamic string */ { unsigned int *cigarPacked = bam1_cigar(bam); const bam1_core_t *core = &bam->core; int i; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigarPacked[i], &op); dyStringPrintf(dyCigar, "%d", n); dyStringAppendC(dyCigar, op); } }
static int countBam(const bam1_t *bam, void *data) /* bam_fetch() calls this on each bam alignment retrieved. */ { struct bamWigTrackData *btd = (struct bamWigTrackData *)data; const bam1_core_t *core = &bam->core; int tLength=0, tPos = core->pos, qPos = 0; unsigned int *cigar = bam1_cigar(bam); int i; double scale = btd->scale; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigar[i], &op); switch (op) { case 'X': // mismatch (gapless aligned block) case '=': // match (gapless aligned block) case 'M': // match or mismatch (gapless aligned block) { int start = (int)(scale * (tPos - winStart)); int end = (int)(scale * ((tPos + n) - winStart)); for(i=start; i < end; i++) btd->preDraw[i + btd->preDrawZero].count++; tPos = tPos + n; qPos = qPos + n; tLength += n; break; } case 'I': // inserted in query qPos += n; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tPos += n; tLength += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("countBam: unrecognized CIGAR op %c -- update me", op); } } return 0; }
struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength) /* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures, * and tally up length on reference sequence while we're at it. */ { const bam1_core_t *core = &bam->core; struct simpleFeature *sf, *sfList = NULL; int tLength=0, tPos = core->pos, qPos = 0; unsigned int *cigar = bam1_cigar(bam); int i; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigar[i], &op); switch (op) { case 'X': // mismatch (gapless aligned block) case '=': // match (gapless aligned block) case 'M': // match or mismatch (gapless aligned block) AllocVar(sf); sf->start = tPos; sf->qStart = qPos; tPos = sf->end = tPos + n; qPos = sf->qEnd = qPos + n; slAddHead(&sfList, sf); tLength += n; break; case 'I': // inserted in query qPos += n; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tPos += n; tLength += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("sfFromNumericCigar: unrecognized CIGAR op %c -- update me", op); } } if (retLength != NULL) *retLength = tLength; slReverse(&sfList); return sfList; }
void bamShowCigarEnglish(const bam1_t *bam) /* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */ { unsigned int *cigarPacked = bam1_cigar(bam); const bam1_core_t *core = &bam->core; int i; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigarPacked[i], &op); if (i > 0) printf(", "); switch (op) { case 'M': // match or mismatch (gapless aligned block) printf("%d (mis)Match", n); break; case '=': // match printf("%d Match", n); break; case 'X': // mismatch printf("%d Mismatch", n); break; case 'I': // inserted in query printf("%d Insertion", n); break; case 'S': // skipped query bases at beginning or end ("soft clipping") printf("%d Skipped", n); break; case 'D': // deleted from query printf("%d Deletion", n); break; case 'N': // long deletion from query (intron as opposed to small del) printf("%d deletioN", n); break; case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") printf("%d Hard clipped query", n); break; case 'P': // P="silent deletion from padded reference sequence" printf("%d Padded / silent deletion", n); break; default: errAbort("bamShowCigarEnglish: unrecognized CIGAR op %c -- update me", op); } } }
static struct psl *pslFromBam(const bam1_t *bam) /* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords, * no scoring info) */ { const bam1_core_t *core = &bam->core; struct psl *psl; AllocVar(psl); boolean isRc = (core->flag & BAM_FREVERSE); psl->strand[0] = isRc ? '-' : '+'; psl->qName = cloneString(bam1_qname(bam)); psl->tName = cloneString(chromName); unsigned blockCount = 0; unsigned *blockSizes, *qStarts, *tStarts; AllocArray(blockSizes, core->n_cigar); AllocArray(qStarts, core->n_cigar); AllocArray(tStarts, core->n_cigar); int tPos = core->pos, qPos = 0, qLength = 0; unsigned int *cigar = bam1_cigar(bam); int i; for (i = 0; i < core->n_cigar; i++) { char op; int n = bamUnpackCigarElement(cigar[i], &op); switch (op) { case 'X': // mismatch (gapless aligned block) case '=': // match (gapless aligned block) case 'M': // match or mismatch (gapless aligned block) blockSizes[blockCount] = n; qStarts[blockCount] = qPos; tStarts[blockCount] = tPos; blockCount++; tPos += n; qPos += n; qLength += n; break; case 'I': // inserted in query qPos += n; qLength += n; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tPos += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") qPos += n; qLength += n; break; case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("pslFromBam: unrecognized CIGAR op %c -- update me", op); } } if (blockCount == 0) { // sometimes BAM's have alignments with no alignment return NULL; // leaks allocated PSL. } psl->tSize = hChromSize(database, chromName); psl->tStart = tStarts[0]; psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1]; psl->qSize = qLength; psl->qStart = qStarts[0]; psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1]; if (isRc) reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize); psl->blockCount = blockCount; psl->blockSizes = blockSizes; psl->qStarts = qStarts; psl->tStarts = tStarts; return psl; }