Exemplo n.º 1
0
static void addBwCorrelations(struct bbiChromInfo *chrom, struct genomeRangeTree *targetGrt,
    struct bigWigValsOnChrom *aVals, struct bigWigValsOnChrom *bVals,
    struct bbiFile *aBbi, struct bbiFile *bBbi, 
    double aThreshold, double bThreshold,
    struct correlate *c, struct correlate *cInEnriched, struct correlate *cClipped)
/* Find bits of a and b that overlap and also overlap with targetRanges.  Do correlations there */
{
struct rbTree *targetRanges = genomeRangeTreeFindRangeTree(targetGrt, chrom->name);
if (bigWigValsOnChromFetchData(aVals, chrom->name, aBbi) &&
    bigWigValsOnChromFetchData(bVals, chrom->name, bBbi) )
    {
    double *a = aVals->valBuf, *b = bVals->valBuf;
    int i, end = chrom->size;
    for (i=0; i<end; ++i)
	{
	double aVal = a[i], bVal = b[i];
	correlateNext(c, aVal, bVal);
	if (aVal > aThreshold) aVal = aThreshold;
	if (bVal > bThreshold) bVal = bThreshold;
	correlateNext(cClipped, aVal, bVal);
	}
    if (targetRanges != NULL)
	{
	struct range *range, *rangeList = rangeTreeList(targetRanges);
	for (range = rangeList; range != NULL; range = range->next)
	    {
	    int start = range->start, end = range->end;
	    for (i=start; i<end; ++i)
		correlateNext(cInEnriched, a[i], b[i]);
	    }
	}
    }
}
Exemplo n.º 2
0
void doItBigWig(struct inInfo *in, struct bed *chiaList, struct bigWigValsOnChrom *chromVals,
	double *out1, double *out2)
{
struct bed *chromStart, *chromEnd, *chia;
int chiaIx = 0;
for (chromStart = chiaList; chromStart != NULL; chromStart = chromEnd)
    {
    chromEnd = bedListNextDifferentChrom(chromStart);
    if (bigWigValsOnChromFetchData(chromVals, chromStart->chrom, in->bbi))
        {
	for (chia = chromStart; chia != chromEnd; chia = chia->next)
	    {
	    int blockStart = chia->chromStart;
	    int blockSize = chia->blockSizes[0];
	    out1[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);

	    blockStart = chia->chromStart + chia->chromStarts[1];
	    blockSize = chia->blockSizes[1];
	    out2[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);

	    ++chiaIx;
	    }
	}
    else
	{
	/* No data on this chrom, just output zero everywhere. */
	for (chia = chromStart; chia != chromEnd; chia = chia->next)
	    {
	    out1[chiaIx] = out2[chiaIx] = 0;
	    ++chiaIx;
	    }
	}
    verboseDot();
    }
}
Exemplo n.º 3
0
void averageFetchingEachChrom(struct bbiFile *bbi, struct bed **pBedList, int fieldCount, 
	FILE *f, FILE *bedF)
/* Do the averaging by sorting bedList by chromosome, and then processing each chromosome
 * at once. Faster for long bedLists. */
{
/* Sort by chromosome. */
slSort(pBedList, bedCmpChrom);

struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew();

struct bed *bed, *bedList, *nextChrom;
verbose(1, "processing chromosomes");
for (bedList = *pBedList; bedList != NULL; bedList = nextChrom)
    {
    /* Figure out which chromosome we're working on, and the last bed using it. */
    char *chrom = bedList->chrom;
    nextChrom = nextChromInList(bedList);
    verbose(2, "Processing %s\n", chrom);

    if (bigWigValsOnChromFetchData(chromVals, chrom, bbi))
	{
	double *valBuf = chromVals->valBuf;
	Bits *covBuf = chromVals->covBuf;

	/* Loop through beds doing sums and outputting. */
	for (bed = bedList; bed != nextChrom; bed = bed->next)
	    {
	    int size = 0, coverage = 0;
	    double sum = 0.0;
	    if (sampleAroundCenter > 0)
		{
		int center = (bed->chromStart + bed->chromEnd)/2;
		int left = center - (sampleAroundCenter/2);
		addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter,
		    &size, &coverage, &sum);
		}
	    else
		{
		if (fieldCount < 12)
		    {
		    addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd,
			&size, &coverage, &sum);
		    }
		else
		    {
		    int i;
		    for (i=0; i<bed->blockCount; ++i)
			{
			int start = bed->chromStart + bed->chromStarts[i];
			int end = start + bed->blockSizes[i];
			addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum);
			}
		    }
		}

	    /* Print out result, fudging mean to 0 if no coverage at all. */
	    double mean = 0;
	    if (coverage > 0)
		 mean = sum/coverage;
	    fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean);
	    optionallyPrintBedPlus(bedF, bed, fieldCount, mean);
	    }
	verboseDot();
	}
    else
        {
	/* If no bigWig data on this chromosome, just output as if coverage is 0 */
	for (bed = bedList; bed != nextChrom; bed = bed->next)
	    {
	    fprintf(f, "%s\t%d\t0\t0\t0\t0\n", bed->name, bedTotalBlockSize(bed));
	    optionallyPrintBedPlus(bedF, bed, fieldCount, 0);
	    }
	}
    }
verbose(1, "\n");
}
Exemplo n.º 4
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}