static void addBwCorrelations(struct bbiChromInfo *chrom, struct genomeRangeTree *targetGrt, struct bigWigValsOnChrom *aVals, struct bigWigValsOnChrom *bVals, struct bbiFile *aBbi, struct bbiFile *bBbi, double aThreshold, double bThreshold, struct correlate *c, struct correlate *cInEnriched, struct correlate *cClipped) /* Find bits of a and b that overlap and also overlap with targetRanges. Do correlations there */ { struct rbTree *targetRanges = genomeRangeTreeFindRangeTree(targetGrt, chrom->name); if (bigWigValsOnChromFetchData(aVals, chrom->name, aBbi) && bigWigValsOnChromFetchData(bVals, chrom->name, bBbi) ) { double *a = aVals->valBuf, *b = bVals->valBuf; int i, end = chrom->size; for (i=0; i<end; ++i) { double aVal = a[i], bVal = b[i]; correlateNext(c, aVal, bVal); if (aVal > aThreshold) aVal = aThreshold; if (bVal > bThreshold) bVal = bThreshold; correlateNext(cClipped, aVal, bVal); } if (targetRanges != NULL) { struct range *range, *rangeList = rangeTreeList(targetRanges); for (range = rangeList; range != NULL; range = range->next) { int start = range->start, end = range->end; for (i=start; i<end; ++i) correlateNext(cInEnriched, a[i], b[i]); } } } }
void doItBigWig(struct inInfo *in, struct bed *chiaList, struct bigWigValsOnChrom *chromVals, double *out1, double *out2) { struct bed *chromStart, *chromEnd, *chia; int chiaIx = 0; for (chromStart = chiaList; chromStart != NULL; chromStart = chromEnd) { chromEnd = bedListNextDifferentChrom(chromStart); if (bigWigValsOnChromFetchData(chromVals, chromStart->chrom, in->bbi)) { for (chia = chromStart; chia != chromEnd; chia = chia->next) { int blockStart = chia->chromStart; int blockSize = chia->blockSizes[0]; out1[chiaIx] = averageInRegion(chromVals, blockStart, blockSize); blockStart = chia->chromStart + chia->chromStarts[1]; blockSize = chia->blockSizes[1]; out2[chiaIx] = averageInRegion(chromVals, blockStart, blockSize); ++chiaIx; } } else { /* No data on this chrom, just output zero everywhere. */ for (chia = chromStart; chia != chromEnd; chia = chia->next) { out1[chiaIx] = out2[chiaIx] = 0; ++chiaIx; } } verboseDot(); } }
void averageFetchingEachChrom(struct bbiFile *bbi, struct bed **pBedList, int fieldCount, FILE *f, FILE *bedF) /* Do the averaging by sorting bedList by chromosome, and then processing each chromosome * at once. Faster for long bedLists. */ { /* Sort by chromosome. */ slSort(pBedList, bedCmpChrom); struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew(); struct bed *bed, *bedList, *nextChrom; verbose(1, "processing chromosomes"); for (bedList = *pBedList; bedList != NULL; bedList = nextChrom) { /* Figure out which chromosome we're working on, and the last bed using it. */ char *chrom = bedList->chrom; nextChrom = nextChromInList(bedList); verbose(2, "Processing %s\n", chrom); if (bigWigValsOnChromFetchData(chromVals, chrom, bbi)) { double *valBuf = chromVals->valBuf; Bits *covBuf = chromVals->covBuf; /* Loop through beds doing sums and outputting. */ for (bed = bedList; bed != nextChrom; bed = bed->next) { int size = 0, coverage = 0; double sum = 0.0; if (sampleAroundCenter > 0) { int center = (bed->chromStart + bed->chromEnd)/2; int left = center - (sampleAroundCenter/2); addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter, &size, &coverage, &sum); } else { if (fieldCount < 12) { addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd, &size, &coverage, &sum); } else { int i; for (i=0; i<bed->blockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum); } } } /* Print out result, fudging mean to 0 if no coverage at all. */ double mean = 0; if (coverage > 0) mean = sum/coverage; fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean); optionallyPrintBedPlus(bedF, bed, fieldCount, mean); } verboseDot(); } else { /* If no bigWig data on this chromosome, just output as if coverage is 0 */ for (bed = bedList; bed != nextChrom; bed = bed->next) { fprintf(f, "%s\t%d\t0\t0\t0\t0\n", bed->name, bedTotalBlockSize(bed)); optionallyPrintBedPlus(bedF, bed, fieldCount, 0); } } } verbose(1, "\n"); }
void doEnrichmentsFromBigWig(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bigBed file. */ { /* Get path to bigBed, open it, and read all chromosomes. */ char *bigWigPath = cdwPathForFileId(conn, ef->id); struct bbiFile *bbi = bigWigFileOpen(bigWigPath); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew(); /* This takes a while, so let's figure out what parts take the time. */ long totalBigQueryTime = 0; long totalOverlapTime = 0; /* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases * for all targets. This is complicated by just wanting to keep one chromosome worth of * bigWig data in memory. Also just for performance we do a lookup of target range tree to * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { long startBigQueryTime = clock1000(); boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi); long endBigQueryTime = clock1000(); totalBigQueryTime += endBigQueryTime - startBigQueryTime; if (gotData) { double *valBuf = valsOnChrom->valBuf; Bits *covBuf = valsOnChrom->covBuf; /* Loop through all targets adding overlaps from ivList */ long startOverlapTime = clock1000(); struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(targetTree); for (range = rangeList; range != NULL; range = range->next) { int s = range->start, e = range->end, i; for (i=s; i<=e; ++i) { if (bitReadOne(covBuf, i)) { double x = valBuf[i]; target->uniqOverlapBases += 1; target->overlapBases += x; } } } } } long endOverlapTime = clock1000(); totalOverlapTime += endOverlapTime - startOverlapTime; } } verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime); /* Now loop through targets and save enrichment info to database */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, target->overlapBases, target->uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } bigWigValsOnChromFree(&valsOnChrom); bbiChromInfoFreeList(&chromList); bigWigFileClose(&bbi); freez(&bigWigPath); }