double calcNormScoreFactor(char *fileName, int scoreCol) /* Figure out what to multiply things by to get a nice browser score (0-1000) */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[scoreCol+1]; double sum = 0, sumSquares = 0; int n = 0; double minVal=0, maxVal=0; int fieldCount; while ((fieldCount = lineFileChop(lf, row)) != 0) { lineFileExpectAtLeast(lf, scoreCol+1, fieldCount); double x = sqlDouble(row[scoreCol]); if (n == 0) minVal = maxVal = x; if (x < minVal) minVal = x; if (x > maxVal) maxVal = x; sum += x; sumSquares += x*x; n += 1; } lineFileClose(&lf); double std = calcStdFromSums(sum, sumSquares, n); double mean = sum/n; double highEnd = mean + std; if (highEnd > maxVal) highEnd = maxVal; return 1000.0/highEnd; }
double twoStdsOverMean(struct bbiFile *bbi) /* Figure out what is two standard deviations over mean for a bigWig file. This is * an often useful threshold. */ { struct bbiSummaryElement sum = bbiTotalSummary(bbi); double mean = sum.sumData/sum.validCount; double std = calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount); return mean + 2*std; }
double bigWigStd(struct bbiFile* bw) /* return the mean value of a bigWig */ { double na = NANUM; struct bbiSummaryElement bbs = bbiTotalSummary(bw); if (bbs.validCount == 0) return na; else return calcStdFromSums(bbs.sumData, bbs.sumSquares, bbs.validCount); }
boolean bbiSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, BbiFetchIntervals fetchIntervals, enum bbiSummaryType summaryType, int summarySize, double *summaryValues) /* Fill in summaryValues with data from indicated chromosome range in bigWig file. * Be sure to initialize summaryValues to a default value, which will not be touched * for regions without data in file. (Generally you want the default value to either * be 0.0 or nan("") depending on the application.) Returns FALSE if no data * at that position. */ { struct bbiSummaryElement *elements; AllocArray(elements, summarySize); boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, fetchIntervals, summarySize, elements); if (ret) { int i; double covFactor = (double)summarySize/(end - start); for (i=0; i<summarySize; ++i) { struct bbiSummaryElement *el = &elements[i]; if (el->validCount > 0) { double val; switch (summaryType) { case bbiSumMean: val = el->sumData/el->validCount; break; case bbiSumMax: val = el->maxVal; break; case bbiSumMin: val = el->minVal; break; case bbiSumCoverage: val = covFactor*el->validCount; break; case bbiSumStandardDeviation: val = calcStdFromSums(el->sumData, el->sumSquares, el->validCount); break; default: internalErr(); val = 0.0; break; } summaryValues[i] = val; } } } freeMem(elements); return ret; }
void bbiIntervalStatsReport(struct bbiInterval *bbList, char *table, char *chrom, bits32 start, bits32 end) /* Write out little statistical report in HTML */ { /* Loop through list and calculate some stats. */ bits64 iCount = 0; bits64 iTotalSize = 0; bits32 biggestSize = 0, smallestSize = BIGNUM; struct bbiInterval *bb; double sum = 0.0, sumSquares = 0.0; double minVal = bbList->val, maxVal = bbList->val; for (bb = bbList; bb != NULL; bb = bb->next) { iCount += 1; bits32 size = bb->end - bb->start; iTotalSize += size; if (biggestSize < size) biggestSize = size; if (smallestSize > size) smallestSize = size; double val = bb->val; sum += val; sumSquares += val * val; if (minVal > val) minVal = val; if (maxVal < val) maxVal = val; } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, iCount); sprintLongWithCommas(num2Buf, iTotalSize); bits32 winSize = end-start; printf("<B>Statistics on:</B> %s <B>items covering</B> %s bases (%4.2f%% coverage)<BR>\n", num1Buf, num2Buf, 100.0*iTotalSize/winSize); printf("<B>Average item spans</B> %4.2f <B>bases.</B> ", (double)iTotalSize/iCount); if (biggestSize != smallestSize) { sprintLongWithCommas(num1Buf, smallestSize); sprintLongWithCommas(num2Buf, biggestSize); printf("<B>Minimum span</B> %s <B>maximum span</B> %s", num1Buf, num2Buf); } printf("<BR>\n"); printf("<B>Average value</B> %g <B>min</B> %g <B>max</B> %g <B> standard deviation </B> %g<BR>\n", sum/iCount, minVal, maxVal, calcStdFromSums(sum, sumSquares, iCount)); }
void printStats(FILE *f, struct slDouble *list) /* Print out stats on list: ave +-std min 1/4 median 3/4 max */ { int count = 0; struct slDouble *el; double sum=0, sumSquared=0; for (el = list; el != NULL; el = el->next) { sum += el->val; sumSquared += el->val * el->val; count += 1; } double ave = sum/count; double std = calcStdFromSums(sum, sumSquared, count); double minVal, q1, median, q3, maxVal; slDoubleBoxWhiskerCalc(list, &minVal, &q1, &median, &q3, &maxVal); fprintf(f, "\t%g+-%g [%g %g %g %g %g]", ave, std, minVal, q1, median, q3, maxVal); }
void aveNoQuartiles(char *fileName) /* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */ { bits64 count = 0; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[128], *word; int wordCount; int wordIx = col-1; double sumData = 0.0, sumSquares = 0.0; double minVal = DBL_MAX, maxVal = -DBL_MAX; while ((wordCount = lineFileChop(lf, words)) > 0) { word = words[wordIx]; if (word[0] == '-' || isdigit(word[0])) { double val = sqlDouble(word); if (minVal > val) minVal = val; if (maxVal < val) maxVal = val; sumData += val; sumSquares += val * val; ++count; } } if (count == 0) errAbort("No numerical data column %d of %s", col, fileName); double average = sumData/count; double stdDev = calcStdFromSums(sumData, sumSquares, count); if (tableOut) { printf("# min max mean N sum stddev\n"); printf("%g %g %g %llu %g %g\n", minVal, maxVal, average, count, sumData, stdDev); } else { printf("average %f\n", average); printf("min %f\n", minVal); printf("max %f\n", maxVal); printf("count %llu\n", count); printf("total %f\n", sumData); printf("standard deviation %f\n", stdDev); } }
void bigBedDrawDense(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color) /* Use big-bed summary data to quickly draw bigBed. */ { struct bbiSummaryElement *summary = tg->summary; if (summary) { char *denseCoverage = trackDbSettingClosestToHome(tg->tdb, "denseCoverage"); if (denseCoverage != NULL) { double startVal = 0, endVal = atof(denseCoverage); if (endVal <= 0) { struct bbiSummaryElement sumAll = *tg->sumAll; double mean = sumAll.sumData/sumAll.validCount; double std = calcStdFromSums(sumAll.sumData, sumAll.sumSquares, sumAll.validCount); rangeFromMinMaxMeanStd(0, sumAll.maxVal, mean, std, &startVal, &endVal); } int x; for (x=0; x<width; ++x) { if (summary[x].validCount > 0) { Color color = shadesOfGray[grayInRange(summary[x].maxVal, startVal, endVal)]; hvGfxBox(hvg, x+xOff, yOff, 1, tg->heightPer, color); } } } else { int x; for (x=0; x<width; ++x) { if (summary[x].validCount > 0) { hvGfxBox(hvg, x+xOff, yOff, 1, tg->heightPer, color); } } } } freez(&tg->summary); }
void bigWigInfo(char *fileName) /* bigWigInfo - Print out information about bigWig file.. */ { struct bbiFile *bwf = bigWigFileOpen(fileName); if (optionExists("minMax")) { struct bbiSummaryElement sum = bbiTotalSummary(bwf); printf("%f %f\n", sum.minVal, sum.maxVal); return; } printf("version: %d\n", bwf->version); printf("isCompressed: %s\n", (bwf->uncompressBufSize > 0 ? "yes" : "no")); printf("isSwapped: %d\n", bwf->isSwapped); printLabelAndLongNumber("primaryDataSize", bwf->unzoomedIndexOffset - bwf->unzoomedDataOffset); if (bwf->levelList != NULL) { long long indexEnd = bwf->levelList->dataOffset; printLabelAndLongNumber("primaryIndexSize", indexEnd - bwf->unzoomedIndexOffset); } printf("zoomLevels: %d\n", bwf->zoomLevels); if (optionExists("zooms")) { struct bbiZoomLevel *zoom; for (zoom = bwf->levelList; zoom != NULL; zoom = zoom->next) printf("\t%d\t%d\n", zoom->reductionLevel, (int)(zoom->indexOffset - zoom->dataOffset)); } struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf); printf("chromCount: %d\n", slCount(chromList)); if (optionExists("chroms")) for (chrom=chromList; chrom != NULL; chrom = chrom->next) printf("\t%s %d %d\n", chrom->name, chrom->id, chrom->size); struct bbiSummaryElement sum = bbiTotalSummary(bwf); printLabelAndLongNumber("basesCovered", sum.validCount); printf("mean: %f\n", sum.sumData/sum.validCount); printf("min: %f\n", sum.minVal); printf("max: %f\n", sum.maxVal); printf("std: %f\n", calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount)); }
char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, struct composite *comp, struct taggedFile *tfList) /* Print out info for a bigWig view, including subtracks. */ { /* Look at all tracks in this view and calculate overall limits. */ double sumOfSums = 0, sumOfSumSquares = 0; bits64 sumOfN = 0; struct taggedFile *tf; for (tf = tfList; tf != NULL; tf = tf->next) { if (sameString(tf->manifest->outputType, view->name)) { char *relativeName = tf->manifest->fileName; char *path = relativeName; struct bbiFile *bbi = bigWigFileOpen(path); struct bbiSummaryElement sum = bbiTotalSummary(bbi); sumOfSums += sum.sumData; sumOfSumSquares += sum.sumSquares; sumOfN = sum.validCount; bigWigFileClose(&bbi); } } double mean = sumOfSums/sumOfN; double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN); double clipMax = mean + 6*std; /* Output view stanza. */ char type[64]; safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax); fprintf(f, "%stype %s\n", indent, type); fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax); fprintf(f, "%sminLimit 0\n", indent); fprintf(f, "%smaxLimit %g\n", indent, clipMax); fprintf(f, "%sautoScale off\n", indent); fprintf(f, "%smaxHeightPixels 100:32:16\n", indent); fprintf(f, "%swindowingFunction mean+whiskers\n", indent); return cloneString(type); }
void doSummaryStatsBigWig(struct sqlConnection *conn) /* Put up page showing summary stats for bigWig track. */ { struct trackDb *track = curTrack; char *table = curTable; char *shortLabel = (track == NULL ? table : track->shortLabel); char *fileName = bigWigFileName(table, conn); long startTime = clock1000(); htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this " "page (not implemented yet). Statistics shown here are only for " "the primary table %s (%s).</EM>", shortLabel, table); struct bbiFile *bwf = bigWigFileOpen(fileName); struct region *region, *regionList = getRegions(); double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0; bits64 validCount = 0; if (!anyFilter() && !anyIntersection()) { for (region = regionList; region != NULL; region = region->next) { struct bbiSummaryElement sum; if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery, 1, &sum)) { if (validCount == 0) { minVal = sum.minVal; maxVal = sum.maxVal; } else { if (sum.minVal < minVal) minVal = sum.minVal; if (sum.maxVal > maxVal) maxVal = sum.maxVal; } sumData += sum.sumData; sumSquares += sum.sumSquares; validCount += sum.validCount; } } } else { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; double size = iv->end - iv->start; if (validCount == 0) minVal = maxVal = val; else { if (val < minVal) minVal = val; if (val > maxVal) maxVal = val; } sumData += size*val; sumSquares += size*val*val; validCount += size; } lmCleanup(&lm); } } hTableStart(); floatStatRow("mean", sumData/validCount); floatStatRow("min", minVal); floatStatRow("max", maxVal); floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount)); numberStatRow("bases with data", validCount); long long regionSize = basesInRegion(regionList,0); long long gapTotal = gapsInRegion(conn, regionList,0); numberStatRow("bases with sequence", regionSize - gapTotal); numberStatRow("bases in region", regionSize); wigFilterStatRow(conn); stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off")); long wigFetchTime = clock1000() - startTime; floatStatRow("load and calc time", 0.001*wigFetchTime); hTableEnd(); bbiFileClose(&bwf); htmlClose(); }