예제 #1
0
파일: bamFile.c 프로젝트: avilella/methylQA
int bamGetTargetLength(const bam1_t *bam)
/* Tally up the alignment's length on the reference sequence from
 * bam's packed-int CIGAR representation. */
{
    unsigned int *cigarPacked = bam1_cigar(bam);
    const bam1_core_t *core = &bam->core;
    int tLength=0;
    int i;
    for (i = 0;  i < core->n_cigar;  i++)
    {
        char op;
        int n = bamUnpackCigarElement(cigarPacked[i], &op);
        switch (op)
        {
        case 'M': // match or mismatch (gapless aligned block)
        case '=': // match
        case 'X': // mismatch
            tLength += n;
            break;
        case 'I': // inserted in query
            break;
        case 'D': // deleted from query
        case 'N': // long deletion from query (intron as opposed to small del)
            tLength += n;
            break;
        case 'S': // skipped query bases at beginning or end ("soft clipping")
        case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
        case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
            break;
        default:
            errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op);
        }
    }
    return tLength;
}
예제 #2
0
파일: hgBam.c 프로젝트: davidhoover/kent
struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset,
			 boolean useStrand, char **retQSeq)
/* Convert from bam to ffAli format.  If retQSeq is non-null, set it to the 
 * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */
{
struct ffAli *ffList = NULL, *ff;
const bam1_core_t *core = &bam->core;
boolean isRc = useStrand && bamIsRc(bam);
DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand);
if (retQSeq)
    *retQSeq = needle;
if (isRc)
    reverseComplement(target->dna, target->size);
DNA *haystack = target->dna;
unsigned int *cigarPacked = bam1_cigar(bam);
int tStart = targetOffset, qStart = 0, i;
// If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up.
int iStart = isRc ? (core->n_cigar - 1) : 0;
int iIncr = isRc ? -1 : 1;
for (i = iStart;  isRc ? (i >= 0) : (i < core->n_cigar);  i += iIncr)
    {
    char op;
    int size = bamUnpackCigarElement(cigarPacked[i], &op);
    switch (op)
	{
	case 'M': // match or mismatch (gapless aligned block)
	case '=': // match
	case 'X': // mismatch
	    AllocVar(ff);
	    ff->left = ffList;
	    ffList = ff;
	    ff->nStart = needle + qStart;
	    ff->nEnd = ff->nStart + size;
	    ff->hStart = haystack + tStart - targetOffset;
	    ff->hEnd = ff->hStart + size;
	    tStart += size;
	    qStart += size;
	    break;
	case 'I': // inserted in query
	case 'S': // skipped query bases at beginning or end ("soft clipping")
	    qStart += size;
	    break;
	case 'D': // deleted from query
	case 'N': // long deletion from query (intron as opposed to small del)
	    tStart += size;
	    break;
	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
	    break;
	default:
	    errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op);
	}
    }
ffList = ffMakeRightLinks(ffList);
ffCountGoodEnds(ffList);
return ffList;
}
예제 #3
0
파일: bamFile.c 프로젝트: avilella/methylQA
void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
 * the beginning of the query sequence and quality; likewise for retHigh at end.
 * For convenience, retClippedQLen is the original query length minus soft clipping
 * (and the length of the query sequence that will be returned). */
{
    unsigned int *cigarPacked = bam1_cigar(bam);
    const bam1_core_t *core = &bam->core;
    char op;
    int n = bamUnpackCigarElement(cigarPacked[0], &op);
    int low = (op == 'S') ? n : 0;
    n = bamUnpackCigarElement(cigarPacked[core->n_cigar-1], &op);
    int high = (op == 'S') ? n : 0;
    if (retLow != NULL)
        *retLow = low;
    if (retHigh != NULL)
        *retHigh = high;
    if (retClippedQLen != NULL)
        *retClippedQLen = (core->l_qseq - low - high);
}
예제 #4
0
파일: bamFile.c 프로젝트: avilella/methylQA
void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar)
/* Unpack CIGAR string into dynamic string */
{
    unsigned int *cigarPacked = bam1_cigar(bam);
    const bam1_core_t *core = &bam->core;
    int i;
    for (i = 0;  i < core->n_cigar;  i++)
    {
        char op;
        int n = bamUnpackCigarElement(cigarPacked[i], &op);
        dyStringPrintf(dyCigar, "%d", n);
        dyStringAppendC(dyCigar, op);
    }
}
예제 #5
0
파일: bamTrack.c 프로젝트: maximilianh/kent
static int countBam(const bam1_t *bam, void *data)
/* bam_fetch() calls this on each bam alignment retrieved.  */
{
struct bamWigTrackData *btd = (struct bamWigTrackData *)data;
const bam1_core_t *core = &bam->core;

int tLength=0, tPos = core->pos, qPos = 0;
unsigned int *cigar = bam1_cigar(bam);
int i;
double scale = btd->scale;
for (i = 0;  i < core->n_cigar;  i++)
    {
    char op;
    int n = bamUnpackCigarElement(cigar[i], &op);
    switch (op)
	{
	case 'X': // mismatch (gapless aligned block)
	case '=': // match (gapless aligned block)
	case 'M': // match or mismatch (gapless aligned block)
	    {
	    int start = (int)(scale * (tPos - winStart));
	    int end = (int)(scale * ((tPos + n) - winStart));
	    for(i=start; i < end; i++)
		btd->preDraw[i + btd->preDrawZero].count++;
	    tPos =  tPos + n;
	    qPos =  qPos + n;
	    tLength += n;
	    break;
	    }
	case 'I': // inserted in query
	    qPos += n;
	    break;
	case 'D': // deleted from query
	case 'N': // long deletion from query (intron as opposed to small del)
	    tPos += n;
	    tLength += n;
	    break;
	case 'S': // skipped query bases at beginning or end ("soft clipping")
	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
	    break;
	default:
	    errAbort("countBam: unrecognized CIGAR op %c -- update me", op);
	}

    }
return 0;
}
예제 #6
0
struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
/* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures,
 * and tally up length on reference sequence while we're at it. */
{
const bam1_core_t *core = &bam->core;
struct simpleFeature *sf, *sfList = NULL;
int tLength=0, tPos = core->pos, qPos = 0;
unsigned int *cigar = bam1_cigar(bam);
int i;
for (i = 0;  i < core->n_cigar;  i++)
    {
    char op;
    int n = bamUnpackCigarElement(cigar[i], &op);
    switch (op)
	{
	case 'X': // mismatch (gapless aligned block)
	case '=': // match (gapless aligned block)
	case 'M': // match or mismatch (gapless aligned block)
	    AllocVar(sf);
	    sf->start = tPos;
	    sf->qStart = qPos;
	    tPos = sf->end = tPos + n;
	    qPos = sf->qEnd = qPos + n;
	    slAddHead(&sfList, sf);
	    tLength += n;
	    break;
	case 'I': // inserted in query
	    qPos += n;
	    break;
	case 'D': // deleted from query
	case 'N': // long deletion from query (intron as opposed to small del)
	    tPos += n;
	    tLength += n;
	    break;
	case 'S': // skipped query bases at beginning or end ("soft clipping")
	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
	    break;
	default:
	    errAbort("sfFromNumericCigar: unrecognized CIGAR op %c -- update me", op);
	}
    }
if (retLength != NULL)
    *retLength = tLength;
slReverse(&sfList);
return sfList;
}
예제 #7
0
파일: bamFile.c 프로젝트: avilella/methylQA
void bamShowCigarEnglish(const bam1_t *bam)
/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
{
    unsigned int *cigarPacked = bam1_cigar(bam);
    const bam1_core_t *core = &bam->core;
    int i;
    for (i = 0;  i < core->n_cigar;  i++)
    {
        char op;
        int n = bamUnpackCigarElement(cigarPacked[i], &op);
        if (i > 0)
            printf(", ");
        switch (op)
        {
        case 'M': // match or mismatch (gapless aligned block)
            printf("%d (mis)Match", n);
            break;
        case '=': // match
            printf("%d Match", n);
            break;
        case 'X': // mismatch
            printf("%d Mismatch", n);
            break;
        case 'I': // inserted in query
            printf("%d Insertion", n);
            break;
        case 'S': // skipped query bases at beginning or end ("soft clipping")
            printf("%d Skipped", n);
            break;
        case 'D': // deleted from query
            printf("%d Deletion", n);
            break;
        case 'N': // long deletion from query (intron as opposed to small del)
            printf("%d deletioN", n);
            break;
        case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
            printf("%d Hard clipped query", n);
            break;
        case 'P': // P="silent deletion from padded reference sequence"
            printf("%d Padded / silent deletion", n);
            break;
        default:
            errAbort("bamShowCigarEnglish: unrecognized CIGAR op %c -- update me", op);
        }
    }
}
예제 #8
0
static struct psl *pslFromBam(const bam1_t *bam)
/* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
 * no scoring info) */
{
const bam1_core_t *core = &bam->core;
struct psl *psl;
AllocVar(psl);
boolean isRc = (core->flag & BAM_FREVERSE);
psl->strand[0] = isRc ? '-' : '+';
psl->qName = cloneString(bam1_qname(bam));
psl->tName = cloneString(chromName);
unsigned blockCount = 0;
unsigned *blockSizes, *qStarts, *tStarts;
AllocArray(blockSizes, core->n_cigar);
AllocArray(qStarts, core->n_cigar);
AllocArray(tStarts, core->n_cigar);
int tPos = core->pos, qPos = 0, qLength = 0;
unsigned int *cigar = bam1_cigar(bam);
int i;
for (i = 0;  i < core->n_cigar;  i++)
    {
    char op;
    int n = bamUnpackCigarElement(cigar[i], &op);
    switch (op)
	{
	case 'X': // mismatch (gapless aligned block)
	case '=': // match (gapless aligned block)
	case 'M': // match or mismatch (gapless aligned block)
	    blockSizes[blockCount] = n;
	    qStarts[blockCount] = qPos;
	    tStarts[blockCount] = tPos;
	    blockCount++;
	    tPos += n;
	    qPos += n;
	    qLength += n;
	    break;
	case 'I': // inserted in query
	    qPos += n;
	    qLength += n;
	    break;
	case 'D': // deleted from query
	case 'N': // long deletion from query (intron as opposed to small del)
	    tPos += n;
	    break;
	case 'S': // skipped query bases at beginning or end ("soft clipping")
	    qPos += n;
	    qLength += n;
	    break;
	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
	    break;
	default:
	    errAbort("pslFromBam: unrecognized CIGAR op %c -- update me", op);
	}
    }

if (blockCount == 0)
    {
    // sometimes BAM's have alignments with no alignment
    return NULL;  // leaks allocated PSL.
    }

psl->tSize = hChromSize(database, chromName);
psl->tStart = tStarts[0];
psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1];
psl->qSize = qLength;
psl->qStart = qStarts[0];
psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1];
if (isRc)
    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
psl->blockCount = blockCount;
psl->blockSizes = blockSizes;
psl->qStarts = qStarts;
psl->tStarts = tStarts;
return psl;
}