Exemplo n.º 1
static void showOverlap(const bam1_t *leftBam, const bam1_t *rightBam)
/* If the two reads overlap, show how. */
const bam1_core_t *leftCore = &(leftBam->core), *rightCore = &(rightBam->core);
int leftStart = leftCore->pos, rightStart = rightCore->pos;
int leftLen = bamGetTargetLength(leftBam), rightLen = bamGetTargetLength(rightBam);
char *leftSeq = bamGetQuerySequence(leftBam, useStrand);
char *rightSeq = bamGetQuerySequence(rightBam, useStrand);
if (useStrand && bamIsRc(leftBam))
    reverseComplement(leftSeq, strlen(leftSeq));
if (useStrand && bamIsRc(rightBam))
    reverseComplement(rightSeq, strlen(rightSeq));
if ((rightStart > leftStart && leftStart + leftLen > rightStart) ||
    (leftStart > rightStart && rightStart+rightLen > leftStart))
    int leftClipLow, rightClipLow;
    bamGetSoftClipping(leftBam, &leftClipLow, NULL, NULL);
    bamGetSoftClipping(rightBam, &rightClipLow, NULL, NULL);
    leftStart -= leftClipLow;
    rightStart -= rightClipLow;
    printf("<B>Note: End read alignments overlap:</B><BR>\n<PRE><TT>");
    int i = leftStart - rightStart;
    while (i-- > 0)
	putc(' ', stdout);
    i = rightStart - leftStart;
    while (i-- > 0)
	putc(' ', stdout);
Exemplo n.º 2
static void singleBamDetails(const bam1_t *bam)
/* Print out the properties of this alignment. */
const bam1_core_t *core = &bam->core;
char *itemName = bam1_qname(bam);
int tLength = bamGetTargetLength(bam);
int tStart = core->pos, tEnd = tStart+tLength;
boolean isRc = useStrand && bamIsRc(bam);
printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName);
if (!skipQualityScore)
    printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual);
printf("<B>CIGAR string: </B><tt>%s</tt> (", bamGetCigar(bam));
printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n &nbsp;&nbsp;", core->flag);
if (bamIsRc(bam))
    printf("<em>Note: although the read was mapped to the reverse strand of the genome, "
	   "the sequence and CIGAR in BAM are relative to the forward strand.</em><BR>\n");
struct dnaSeq *genoSeq = hChromSeq(database, seqName, tStart, tEnd);
char *qSeq = bamGetQuerySequence(bam, FALSE);
if (isNotEmpty(qSeq) && !sameString(qSeq, "*"))
    char *qSeq = NULL;
    struct ffAli *ffa = bamToFfAli(bam, genoSeq, tStart, useStrand, &qSeq);
    printf("<B>Alignment of %s to %s:%d-%d%s:</B><BR>\n", itemName,
	   seqName, tStart+1, tEnd, (isRc ? " (reverse complemented)" : ""));
    ffShowSideBySide(stdout, ffa, qSeq, 0, genoSeq->dna, tStart, tLength, 0, tLength, 8, isRc,
if (!skipQualityScore && core->l_qseq > 0)
    printf("<B>Sequence quality scores:</B><BR>\n<TT><TABLE><TR>\n");
    UBYTE *quals = bamGetQueryQuals(bam, useStrand);
    int i;
    for (i = 0;  i < core->l_qseq;  i++)
        if (i > 0 && (i % 24) == 0)
        printf("<TD>%c<BR>%d</TD>", qSeq[i], quals[i]);
Exemplo n.º 3
struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset,
			 boolean useStrand, char **retQSeq)
/* Convert from bam to ffAli format.  If retQSeq is non-null, set it to the 
 * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */
struct ffAli *ffList = NULL, *ff;
const bam1_core_t *core = &bam->core;
boolean isRc = useStrand && bamIsRc(bam);
DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand);
if (retQSeq)
    *retQSeq = needle;
if (isRc)
    reverseComplement(target->dna, target->size);
DNA *haystack = target->dna;
unsigned int *cigarPacked = bam1_cigar(bam);
int tStart = targetOffset, qStart = 0, i;
// If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up.
int iStart = isRc ? (core->n_cigar - 1) : 0;
int iIncr = isRc ? -1 : 1;
for (i = iStart;  isRc ? (i >= 0) : (i < core->n_cigar);  i += iIncr)
    char op;
    int size = bamUnpackCigarElement(cigarPacked[i], &op);
    switch (op)
	case 'M': // match or mismatch (gapless aligned block)
	case '=': // match
	case 'X': // mismatch
	    ff->left = ffList;
	    ffList = ff;
	    ff->nStart = needle + qStart;
	    ff->nEnd = ff->nStart + size;
	    ff->hStart = haystack + tStart - targetOffset;
	    ff->hEnd = ff->hStart + size;
	    tStart += size;
	    qStart += size;
	case 'I': // inserted in query
	case 'S': // skipped query bases at beginning or end ("soft clipping")
	    qStart += size;
	case 'D': // deleted from query
	case 'N': // long deletion from query (intron as opposed to small del)
	    tStart += size;
	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
	    errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op);
ffList = ffMakeRightLinks(ffList);
return ffList;
Exemplo n.º 4
UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
/* Return the base quality scores encoded in bam as an array of ubytes. */
    const bam1_core_t *core = &bam->core;
    int qLen = core->l_qseq;
    UBYTE *arr = needMem(qLen);
    boolean isRc = useStrand && bamIsRc(bam);
    UBYTE *qualStr = bam1_qual(bam);
    int i;
    for (i = 0;  i < core->l_qseq;  i++)
        int offset = isRc ? (qLen - 1 - i) : i;
        arr[i] = (qualStr[0] == 255) ? 255 : qualStr[offset];
    return arr;
Exemplo n.º 5
void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq)
/* Fill in qSeq with the nucleotide sequence encoded in bam.  The BAM format
 * reverse-complements query sequence when the alignment is on the - strand,
 * so if useStrand is given we rev-comp it back to restore the original query
 * sequence. */
    const bam1_core_t *core = &bam->core;
    int qLen = core->l_qseq;
    uint8_t *packedQSeq = bam1_seq(bam);
    int i;
    for (i = 0; i < qLen; i++)
        qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)];
    qSeq[i] = '\0';
    if (useStrand && bamIsRc(bam))
        reverseComplement(qSeq, qLen);