Esempio n. 1
0
static int countBasesOverlap(struct bed *bedItem, Bits *bits, boolean hasBlocks,
			     int chromSize)
/* Return the number of bases belonging to bedItem covered by bits. */
{
int count = 0;
int i;

if (bedItem->chromEnd > chromSize)
    errAbort("Item out of range [0,%d): %s %s:%d-%d",
	     chromSize, (bedItem->name ? bedItem->name : ""),
	     bedItem->chrom, bedItem->chromStart, bedItem->chromEnd);

if (bedItem->chromStart == bedItem->chromEnd)
    {
    /* Zero-size item: count overlap with adjacent bases. */
    for (i = bedItem->chromStart-1;  i < bedItem->chromEnd+1;  i++)
	if (i >= 0 && i < chromSize && bitReadOne(bits, i))
	    count++;
    }
else if (hasBlocks)
    {
    for (i=0;  i < bedItem->blockCount;  i++)
	{
	int start = bedItem->chromStart + bedItem->chromStarts[i];
	count += bitCountRange(bits, start, bedItem->blockSizes[i]);
	}
    }
else
    {
    count = bitCountRange(bits, bedItem->chromStart, bedItem->chromEnd - bedItem->chromStart);
    }
return(count);
}
Esempio n. 2
0
struct hash *chainReadUsedSwapLf(char *fileName, boolean swapQ, Bits *bits, struct lineFile *lf)
/* Read chains that are marked as used in the 
 * bits array (which may be NULL) into a hash keyed by id. */
{
char nameBuf[16];
struct hash *hash = hashNew(18);
struct chain *chain;
int usedCount = 0, count = 0;

while ((chain = chainRead(lf)) != NULL)
    {
    ++count;
    if (bits != NULL && !bitReadOne(bits, chain->id))
	{
	chainFree(&chain);
        continue;
	}
    safef(nameBuf, sizeof(nameBuf), "%x", chain->id);
    if (hashLookup(hash, nameBuf))
        errAbort("Duplicate chain %d ending line %d of %s", 
		chain->id, lf->lineIx, lf->fileName);
    if (swapQ)
        chainSwap(chain);
    hashAdd(hash, nameBuf, chain);
    ++usedCount;
    }
return hash;
}
int findCrossover(Bits *bits, int overlapStart, int overlapEnd)
/* Search from middle of overlap until find a clear spot. 
 * Return -1 if no such spot*/
{
int i, offset;
int size = overlapEnd - overlapStart;
int halfSize = size/2;
int halfPlus = halfSize+1;

for (i=0; i<=halfPlus; ++i)
    {
    offset = halfSize + i;
    if (offset < size)
        if (!bitReadOne(bits, offset))
	    return offset+overlapStart;
    offset = halfSize - i;
    if (offset >= 0)
        if (!bitReadOne(bits, offset))
	    return offset+overlapStart;
    }
return -1;
}
static int bitCountBasesOverlap(struct bed *bedItem, Bits *bits, boolean hasBlocks)
/* Return the number of bases belonging to bedItem covered by bits. */
{
int count = 0;
int i, j;
if (hasBlocks)
    {
    for (i=0;  i < bedItem->blockCount;  i++)
	{
	int start = bedItem->chromStart + bedItem->chromStarts[i];
	int end   = start + bedItem->blockSizes[i];
	for (j=start;  j < end;  j++)
	    if (bitReadOne(bits, j))
		count++;
	}
    }
else
    {
    for (i=bedItem->chromStart;  i < bedItem->chromEnd;  i++)
	if (bitReadOne(bits, i))
	    count++;
    }
    return(count);
}
Esempio n. 5
0
static void savePslx(char *chromName, int chromSize, int chromOffset,
                     struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq,
                     boolean isRc, enum ffStringency stringency, int minMatch, FILE *f,
                     struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc,
                     struct hash *maskHash, int minIdentity,
                     boolean qIsProt, boolean tIsProt, boolean saveSeq)
/* Analyse one alignment and if it looks good enough write it out to file in
 * psl format (or pslX format - if saveSeq is TRUE).  */
{
    /* This function was stolen from psLayout and slightly extensively to cope
     * with protein as well as DNA aligments. */
    struct ffAli *ff, *nextFf;
    struct ffAli *right = ffRightmost(ali);
    DNA *needle = qSeq->dna;
    DNA *hay = tSeq->dna;
    int nStart = ali->nStart - needle;
    int nEnd = right->nEnd - needle;
    int hStart, hEnd;
    int nInsertBaseCount = 0;
    int nInsertCount = 0;
    int hInsertBaseCount = 0;
    int hInsertCount = 0;
    int matchCount = 0;
    int mismatchCount = 0;
    int repMatch = 0;
    int countNs = 0;
    DNA *np, *hp, n, h;
    int blockSize;
    int i;
    struct trans3 *t3List = NULL;
    Bits *maskBits = NULL;

    if (maskHash != NULL)
        maskBits = hashMustFindVal(maskHash, tSeq->name);
    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset;
    hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset;

    /* Count up matches, mismatches, inserts, etc. */
    for (ff = ali; ff != NULL; ff = nextFf)
    {
        nextFf = ff->right;
        blockSize = ff->nEnd - ff->nStart;
        np = ff->nStart;
        hp = ff->hStart;
        for (i=0; i<blockSize; ++i)
        {
            n = np[i];
            h = hp[i];
            if (n == 'n' || h == 'n')
                ++countNs;
            else
            {
                if (n == h)
                {
                    if (maskBits != NULL)
                    {
                        int seqOff = hp + i - hay;
                        if (bitReadOne(maskBits, seqOff))
                            ++repMatch;
                        else
                            ++matchCount;
                    }
                    else
                        ++matchCount;
                }
                else
                    ++mismatchCount;
            }
        }
        if (nextFf != NULL)
        {
            int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset;
            int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset;
            int hGap = nhStart - ohEnd;
            int nGap = nextFf->nStart - ff->nEnd;

            if (nGap != 0)
            {
                ++nInsertCount;
                nInsertBaseCount += nGap;
            }
            if (hGap != 0)
            {
                ++hInsertCount;
                hInsertBaseCount += hGap;
            }
        }
    }


    /* See if it looks good enough to output, and output. */
    /* if (score >= minMatch) Moved to higher level */
    {
        int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount);
        int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount);
        if (id >= minIdentity)
        {
            if (isRc)
            {
                int temp;
                int oSize = qSeq->size;
                temp = nStart;
                nStart = oSize - nEnd;
                nEnd = oSize - temp;
            }
            if (targetIsRc)
            {
                int temp;
                temp = hStart;
                hStart = chromSize - hEnd;
                hEnd = chromSize - temp;
            }
            fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c",
                    matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount,
                    (isRc ? '-' : '+'));
            if (reportTargetStrand)
                fprintf(f, "%c", (targetIsRc ? '-' : '+') );
            fprintf(f, "\t%s\t%d\t%d\t%d\t"
                    "%s\t%d\t%d\t%d\t%d\t",
                    qSeq->name, qSeq->size, nStart, nEnd,
                    chromName, chromSize, hStart, hEnd,
                    ffAliCount(ali));
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nStart - needle));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset);
            if (saveSeq)
            {
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->nStart, ff->nEnd - ff->nStart);
                    fputc(',', f);
                }
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->hStart, ff->hEnd - ff->hStart);
                    fputc(',', f);
                }
            }
            fprintf(f, "\n");
            if (ferror(f))
            {
                perror("");
                errAbort("Write error to .psl");
            }
        }
    }
}
Esempio n. 6
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}