static void showOverlap(const bam1_t *leftBam, const bam1_t *rightBam) /* If the two reads overlap, show how. */ { const bam1_core_t *leftCore = &(leftBam->core), *rightCore = &(rightBam->core); int leftStart = leftCore->pos, rightStart = rightCore->pos; int leftLen = bamGetTargetLength(leftBam), rightLen = bamGetTargetLength(rightBam); char *leftSeq = bamGetQuerySequence(leftBam, useStrand); char *rightSeq = bamGetQuerySequence(rightBam, useStrand); if (useStrand && bamIsRc(leftBam)) reverseComplement(leftSeq, strlen(leftSeq)); if (useStrand && bamIsRc(rightBam)) reverseComplement(rightSeq, strlen(rightSeq)); if ((rightStart > leftStart && leftStart + leftLen > rightStart) || (leftStart > rightStart && rightStart+rightLen > leftStart)) { int leftClipLow, rightClipLow; bamGetSoftClipping(leftBam, &leftClipLow, NULL, NULL); bamGetSoftClipping(rightBam, &rightClipLow, NULL, NULL); leftStart -= leftClipLow; rightStart -= rightClipLow; printf("<B>Note: End read alignments overlap:</B><BR>\n<PRE><TT>"); int i = leftStart - rightStart; while (i-- > 0) putc(' ', stdout); puts(leftSeq); i = rightStart - leftStart; while (i-- > 0) putc(' ', stdout); puts(rightSeq); puts("</TT></PRE>"); } }
static void singleBamDetails(const bam1_t *bam) /* Print out the properties of this alignment. */ { const bam1_core_t *core = &bam->core; char *itemName = bam1_qname(bam); int tLength = bamGetTargetLength(bam); int tStart = core->pos, tEnd = tStart+tLength; boolean isRc = useStrand && bamIsRc(bam); printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName); if (!skipQualityScore) printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual); printf("<B>CIGAR string: </B><tt>%s</tt> (", bamGetCigar(bam)); bamShowCigarEnglish(bam); printf(")<BR>\n"); printf("<B>Tags:</B>"); bamShowTags(bam); puts("<BR>"); printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n ", core->flag); bamShowFlagsEnglish(bam); puts("<BR>"); if (bamIsRc(bam)) printf("<em>Note: although the read was mapped to the reverse strand of the genome, " "the sequence and CIGAR in BAM are relative to the forward strand.</em><BR>\n"); puts("<BR>"); struct dnaSeq *genoSeq = hChromSeq(database, seqName, tStart, tEnd); char *qSeq = bamGetQuerySequence(bam, FALSE); if (isNotEmpty(qSeq) && !sameString(qSeq, "*")) { char *qSeq = NULL; struct ffAli *ffa = bamToFfAli(bam, genoSeq, tStart, useStrand, &qSeq); printf("<B>Alignment of %s to %s:%d-%d%s:</B><BR>\n", itemName, seqName, tStart+1, tEnd, (isRc ? " (reverse complemented)" : "")); ffShowSideBySide(stdout, ffa, qSeq, 0, genoSeq->dna, tStart, tLength, 0, tLength, 8, isRc, FALSE); } if (!skipQualityScore && core->l_qseq > 0) { printf("<B>Sequence quality scores:</B><BR>\n<TT><TABLE><TR>\n"); UBYTE *quals = bamGetQueryQuals(bam, useStrand); int i; for (i = 0; i < core->l_qseq; i++) { if (i > 0 && (i % 24) == 0) printf("</TR>\n<TR>"); printf("<TD>%c<BR>%d</TD>", qSeq[i], quals[i]); } printf("</TR></TABLE></TT>\n"); } }
struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset, boolean useStrand, char **retQSeq) /* Convert from bam to ffAli format. If retQSeq is non-null, set it to the * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */ { struct ffAli *ffList = NULL, *ff; const bam1_core_t *core = &bam->core; boolean isRc = useStrand && bamIsRc(bam); DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand); if (retQSeq) *retQSeq = needle; if (isRc) reverseComplement(target->dna, target->size); DNA *haystack = target->dna; unsigned int *cigarPacked = bam1_cigar(bam); int tStart = targetOffset, qStart = 0, i; // If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up. int iStart = isRc ? (core->n_cigar - 1) : 0; int iIncr = isRc ? -1 : 1; for (i = iStart; isRc ? (i >= 0) : (i < core->n_cigar); i += iIncr) { char op; int size = bamUnpackCigarElement(cigarPacked[i], &op); switch (op) { case 'M': // match or mismatch (gapless aligned block) case '=': // match case 'X': // mismatch AllocVar(ff); ff->left = ffList; ffList = ff; ff->nStart = needle + qStart; ff->nEnd = ff->nStart + size; ff->hStart = haystack + tStart - targetOffset; ff->hEnd = ff->hStart + size; tStart += size; qStart += size; break; case 'I': // inserted in query case 'S': // skipped query bases at beginning or end ("soft clipping") qStart += size; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tStart += size; break; case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op); } } ffList = ffMakeRightLinks(ffList); ffCountGoodEnds(ffList); return ffList; }
UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand) /* Return the base quality scores encoded in bam as an array of ubytes. */ { const bam1_core_t *core = &bam->core; int qLen = core->l_qseq; UBYTE *arr = needMem(qLen); boolean isRc = useStrand && bamIsRc(bam); UBYTE *qualStr = bam1_qual(bam); int i; for (i = 0; i < core->l_qseq; i++) { int offset = isRc ? (qLen - 1 - i) : i; arr[i] = (qualStr[0] == 255) ? 255 : qualStr[offset]; } return arr; }
void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq) /* Fill in qSeq with the nucleotide sequence encoded in bam. The BAM format * reverse-complements query sequence when the alignment is on the - strand, * so if useStrand is given we rev-comp it back to restore the original query * sequence. */ { const bam1_core_t *core = &bam->core; int qLen = core->l_qseq; uint8_t *packedQSeq = bam1_seq(bam); int i; for (i = 0; i < qLen; i++) qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)]; qSeq[i] = '\0'; if (useStrand && bamIsRc(bam)) reverseComplement(qSeq, qLen); }