boolean bigBedSummaryArrayExtended(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int summarySize, struct bbiSummaryElement *summary) /* Get extended summary information for summarySize evenly spaced elements into * the summary array. */ { return bbiSummaryArrayExtended(bbi, chrom, start, end, bigBedCoverageIntervals, summarySize, summary); }
boolean bigWigSummaryArrayExtended(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int summarySize, struct bbiSummaryElement *summary) /* Get extended summary information for summarySize evenely spaced elements into * the summary array. */ { boolean ret = bbiSummaryArrayExtended(bwf, chrom, start, end, bigWigIntervalQuery, summarySize, summary); return ret; }
boolean bigWigSummaryArrayExtended(char *fileName, char *chrom, bits32 start, bits32 end, int summarySize, struct bbiSummaryElement *summary) /* Get extended summary information for summarySize evenely spaced elements into * the summary array. */ { struct bbiFile *bbi = bigWigFileOpen(fileName); boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, bigWigIntervalQuery, summarySize, summary); bbiFileClose(&bbi); return ret; }
boolean bbiSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, BbiFetchIntervals fetchIntervals, enum bbiSummaryType summaryType, int summarySize, double *summaryValues) /* Fill in summaryValues with data from indicated chromosome range in bigWig file. * Be sure to initialize summaryValues to a default value, which will not be touched * for regions without data in file. (Generally you want the default value to either * be 0.0 or nan("") depending on the application.) Returns FALSE if no data * at that position. */ { struct bbiSummaryElement *elements; AllocArray(elements, summarySize); boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, fetchIntervals, summarySize, elements); if (ret) { int i; double covFactor = (double)summarySize/(end - start); for (i=0; i<summarySize; ++i) { struct bbiSummaryElement *el = &elements[i]; if (el->validCount > 0) { double val; switch (summaryType) { case bbiSumMean: val = el->sumData/el->validCount; break; case bbiSumMax: val = el->maxVal; break; case bbiSumMin: val = el->minVal; break; case bbiSumCoverage: val = covFactor*el->validCount; break; case bbiSumStandardDeviation: val = calcStdFromSums(el->sumData, el->sumSquares, el->validCount); break; default: internalErr(); val = 0.0; break; } summaryValues[i] = val; } } } freeMem(elements); return ret; }
void doSummaryStatsBigWig(struct sqlConnection *conn) /* Put up page showing summary stats for bigWig track. */ { struct trackDb *track = curTrack; char *table = curTable; char *shortLabel = (track == NULL ? table : track->shortLabel); char *fileName = bigWigFileName(table, conn); long startTime = clock1000(); htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this " "page (not implemented yet). Statistics shown here are only for " "the primary table %s (%s).</EM>", shortLabel, table); struct bbiFile *bwf = bigWigFileOpen(fileName); struct region *region, *regionList = getRegions(); double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0; bits64 validCount = 0; if (!anyFilter() && !anyIntersection()) { for (region = regionList; region != NULL; region = region->next) { struct bbiSummaryElement sum; if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery, 1, &sum)) { if (validCount == 0) { minVal = sum.minVal; maxVal = sum.maxVal; } else { if (sum.minVal < minVal) minVal = sum.minVal; if (sum.maxVal > maxVal) maxVal = sum.maxVal; } sumData += sum.sumData; sumSquares += sum.sumSquares; validCount += sum.validCount; } } } else { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; double size = iv->end - iv->start; if (validCount == 0) minVal = maxVal = val; else { if (val < minVal) minVal = val; if (val > maxVal) maxVal = val; } sumData += size*val; sumSquares += size*val*val; validCount += size; } lmCleanup(&lm); } } hTableStart(); floatStatRow("mean", sumData/validCount); floatStatRow("min", minVal); floatStatRow("max", maxVal); floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount)); numberStatRow("bases with data", validCount); long long regionSize = basesInRegion(regionList,0); long long gapTotal = gapsInRegion(conn, regionList,0); numberStatRow("bases with sequence", regionSize - gapTotal); numberStatRow("bases in region", regionSize); wigFilterStatRow(conn); stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off")); long wigFetchTime = clock1000() - startTime; floatStatRow("load and calc time", 0.001*wigFetchTime); hTableEnd(); bbiFileClose(&bwf); htmlClose(); }