Ejemplo n.º 1
double calcNormScoreFactor(char *fileName, int scoreCol)
/* Figure out what to multiply things by to get a nice browser score (0-1000) */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[scoreCol+1];
double sum = 0, sumSquares = 0;
int n = 0;
double minVal=0, maxVal=0;
int fieldCount;
while ((fieldCount = lineFileChop(lf, row)) != 0)
    lineFileExpectAtLeast(lf, scoreCol+1, fieldCount);
    double x = sqlDouble(row[scoreCol]);
    if (n == 0)
        minVal = maxVal = x;
    if (x < minVal) minVal = x;
    if (x > maxVal) maxVal = x;
    sum += x;
    sumSquares += x*x;
    n += 1;
double std = calcStdFromSums(sum, sumSquares, n);
double mean = sum/n;
double highEnd = mean + std;
if (highEnd > maxVal) highEnd = maxVal;
return 1000.0/highEnd;
Ejemplo n.º 2
double twoStdsOverMean(struct bbiFile *bbi)
/* Figure out what is two standard deviations over mean for a bigWig file.  This is
 * an often useful threshold. */
struct bbiSummaryElement sum = bbiTotalSummary(bbi);
double mean = sum.sumData/sum.validCount;
double std = calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount);
return mean + 2*std;
Ejemplo n.º 3
double bigWigStd(struct bbiFile* bw)
/* return the mean value of a bigWig */
    double na = NANUM;
    struct bbiSummaryElement bbs = bbiTotalSummary(bw);
    if (bbs.validCount == 0)
        return na;
        return calcStdFromSums(bbs.sumData, bbs.sumSquares, bbs.validCount);
boolean bbiSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end,
	BbiFetchIntervals fetchIntervals,
	enum bbiSummaryType summaryType, int summarySize, double *summaryValues)
/* Fill in summaryValues with  data from indicated chromosome range in bigWig file.
 * Be sure to initialize summaryValues to a default value, which will not be touched
 * for regions without data in file.  (Generally you want the default value to either
 * be 0.0 or nan("") depending on the application.)  Returns FALSE if no data
 * at that position. */
struct bbiSummaryElement *elements;
AllocArray(elements, summarySize);
boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, 
	fetchIntervals, summarySize, elements);
if (ret)
    int i;
    double covFactor = (double)summarySize/(end - start);
    for (i=0; i<summarySize; ++i)
	struct bbiSummaryElement *el = &elements[i];
	if (el->validCount > 0)
	    double val;
	    switch (summaryType)
		case bbiSumMean:
		    val = el->sumData/el->validCount;
		case bbiSumMax:
		    val = el->maxVal;
		case bbiSumMin:
		    val = el->minVal;
		case bbiSumCoverage:
		    val = covFactor*el->validCount;
		case bbiSumStandardDeviation:
		    val = calcStdFromSums(el->sumData, el->sumSquares, el->validCount);
		    val = 0.0;
	    summaryValues[i] = val;
return ret;
Ejemplo n.º 5
void bbiIntervalStatsReport(struct bbiInterval *bbList, char *table, 
	char *chrom, bits32 start, bits32 end)
/* Write out little statistical report in HTML */
/* Loop through list and calculate some stats. */
bits64 iCount = 0;
bits64 iTotalSize = 0;
bits32 biggestSize = 0, smallestSize = BIGNUM;
struct bbiInterval *bb;
double sum = 0.0, sumSquares = 0.0;
double minVal = bbList->val, maxVal = bbList->val;
for (bb = bbList; bb != NULL; bb = bb->next)
    iCount += 1;
    bits32 size = bb->end - bb->start;
    iTotalSize += size;
    if (biggestSize < size)
        biggestSize = size;
    if (smallestSize > size)
        smallestSize = size;
    double val = bb->val;
    sum += val;
    sumSquares += val * val;
    if (minVal > val)
        minVal = val;
    if (maxVal < val)
        maxVal = val;

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, iCount);
sprintLongWithCommas(num2Buf, iTotalSize);
bits32 winSize = end-start;
printf("<B>Statistics on:</B> %s <B>items covering</B> %s bases (%4.2f%% coverage)<BR>\n",
	num1Buf, num2Buf, 100.0*iTotalSize/winSize);
printf("<B>Average item spans</B> %4.2f <B>bases.</B> ", (double)iTotalSize/iCount);
if (biggestSize != smallestSize)
    sprintLongWithCommas(num1Buf, smallestSize);
    sprintLongWithCommas(num2Buf, biggestSize);
    printf("<B>Minimum span</B> %s <B>maximum span</B> %s", num1Buf, num2Buf);

printf("<B>Average value</B> %g <B>min</B> %g <B>max</B> %g <B> standard deviation </B> %g<BR>\n",
	sum/iCount, minVal, maxVal, calcStdFromSums(sum, sumSquares, iCount));
Ejemplo n.º 6
void printStats(FILE *f, struct slDouble *list)
/* Print out stats on list: ave +-std min 1/4 median 3/4 max */
int count = 0;
struct slDouble *el;
double sum=0, sumSquared=0;
for (el = list; el != NULL; el = el->next)
    sum += el->val;
    sumSquared += el->val * el->val;
    count += 1;
double ave = sum/count;
double std = calcStdFromSums(sum, sumSquared, count);
double minVal, q1, median, q3, maxVal;
slDoubleBoxWhiskerCalc(list, &minVal, &q1, &median, &q3, &maxVal);
fprintf(f, "\t%g+-%g [%g %g %g %g %g]", ave, std, minVal, q1, median, q3, maxVal);
Ejemplo n.º 7
void aveNoQuartiles(char *fileName)
/* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */
bits64 count = 0;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[128], *word;
int wordCount;
int wordIx = col-1;
double sumData = 0.0, sumSquares = 0.0;
double minVal = DBL_MAX, maxVal = -DBL_MAX;

while ((wordCount = lineFileChop(lf, words)) > 0)
    word = words[wordIx];
    if (word[0] == '-' || isdigit(word[0]))
	double val = sqlDouble(word);
	if (minVal > val) minVal = val;
	if (maxVal < val) maxVal = val;
	sumData += val;
	sumSquares += val * val;
if (count == 0)
    errAbort("No numerical data column %d of %s", col, fileName);
double average = sumData/count;
double stdDev = calcStdFromSums(sumData, sumSquares, count);
if (tableOut)
    printf("# min max mean N sum stddev\n");
    printf("%g %g %g %llu %g %g\n",
	minVal, maxVal, average, count, sumData, stdDev);
    printf("average %f\n", average);
    printf("min %f\n", minVal);
    printf("max %f\n", maxVal);
    printf("count %llu\n", count);
    printf("total %f\n", sumData);
    printf("standard deviation %f\n", stdDev);
Ejemplo n.º 8
void bigBedDrawDense(struct track *tg, int seqStart, int seqEnd,
        struct hvGfx *hvg, int xOff, int yOff, int width,
        MgFont *font, Color color)
/* Use big-bed summary data to quickly draw bigBed. */
struct bbiSummaryElement *summary = tg->summary;
if (summary)
    char *denseCoverage = trackDbSettingClosestToHome(tg->tdb, "denseCoverage");
    if (denseCoverage != NULL)
	double startVal = 0, endVal = atof(denseCoverage);
	if (endVal <= 0)
	    struct bbiSummaryElement sumAll = *tg->sumAll;
	    double mean = sumAll.sumData/sumAll.validCount;
	    double std = calcStdFromSums(sumAll.sumData, sumAll.sumSquares, sumAll.validCount);
	    rangeFromMinMaxMeanStd(0, sumAll.maxVal, mean, std, &startVal, &endVal);
	int x;
	for (x=0; x<width; ++x)
	    if (summary[x].validCount > 0)
		Color color = shadesOfGray[grayInRange(summary[x].maxVal, startVal, endVal)];
		hvGfxBox(hvg, x+xOff, yOff, 1, tg->heightPer, color);
	int x;
	for (x=0; x<width; ++x)
	    if (summary[x].validCount > 0)
		hvGfxBox(hvg, x+xOff, yOff, 1, tg->heightPer, color);
Ejemplo n.º 9
void bigWigInfo(char *fileName)
/* bigWigInfo - Print out information about bigWig file.. */
struct bbiFile *bwf = bigWigFileOpen(fileName);

if (optionExists("minMax"))
    struct bbiSummaryElement sum = bbiTotalSummary(bwf);
    printf("%f %f\n", sum.minVal, sum.maxVal);

printf("version: %d\n", bwf->version);
printf("isCompressed: %s\n", (bwf->uncompressBufSize > 0 ? "yes" : "no"));
printf("isSwapped: %d\n", bwf->isSwapped);
printLabelAndLongNumber("primaryDataSize", bwf->unzoomedIndexOffset - bwf->unzoomedDataOffset);
if (bwf->levelList != NULL)
    long long indexEnd = bwf->levelList->dataOffset;
    printLabelAndLongNumber("primaryIndexSize", indexEnd - bwf->unzoomedIndexOffset);
printf("zoomLevels: %d\n", bwf->zoomLevels);
if (optionExists("zooms"))
    struct bbiZoomLevel *zoom;
    for (zoom = bwf->levelList; zoom != NULL; zoom = zoom->next)
	printf("\t%d\t%d\n", zoom->reductionLevel, (int)(zoom->indexOffset - zoom->dataOffset));
struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf);
printf("chromCount: %d\n", slCount(chromList));
if (optionExists("chroms"))
    for (chrom=chromList; chrom != NULL; chrom = chrom->next)
	printf("\t%s %d %d\n", chrom->name, chrom->id, chrom->size);
struct bbiSummaryElement sum = bbiTotalSummary(bwf);
printLabelAndLongNumber("basesCovered", sum.validCount);
printf("mean: %f\n", sum.sumData/sum.validCount);
printf("min: %f\n", sum.minVal);
printf("max: %f\n", sum.maxVal);
printf("std: %f\n", calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount));
Ejemplo n.º 10
char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, 
    struct composite *comp, struct taggedFile *tfList)
/* Print out info for a bigWig view, including subtracks. */
/* Look at all tracks in this view and calculate overall limits. */
double sumOfSums = 0, sumOfSumSquares = 0;
bits64 sumOfN = 0;
struct taggedFile *tf;
for (tf = tfList; tf != NULL; tf = tf->next)
    if (sameString(tf->manifest->outputType, view->name))
	char *relativeName = tf->manifest->fileName;
	char *path = relativeName;
	struct bbiFile *bbi = bigWigFileOpen(path);
	struct bbiSummaryElement sum = bbiTotalSummary(bbi);
	sumOfSums += sum.sumData;
	sumOfSumSquares += sum.sumSquares;
	sumOfN = sum.validCount;
double mean = sumOfSums/sumOfN;
double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN);
double clipMax = mean + 6*std;

/* Output view stanza. */
char type[64];
safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax);
fprintf(f, "%stype %s\n", indent, type);
fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax);
fprintf(f, "%sminLimit 0\n", indent);
fprintf(f, "%smaxLimit %g\n", indent, clipMax);
fprintf(f, "%sautoScale off\n", indent);
fprintf(f, "%smaxHeightPixels 100:32:16\n", indent);
fprintf(f, "%swindowingFunction mean+whiskers\n", indent);
return cloneString(type);
Ejemplo n.º 11
void doSummaryStatsBigWig(struct sqlConnection *conn)
/* Put up page showing summary stats for bigWig track. */
struct trackDb *track = curTrack;
char *table = curTable;
char *shortLabel = (track == NULL ? table : track->shortLabel);
char *fileName = bigWigFileName(table, conn);
long startTime = clock1000();

htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);

if (anySubtrackMerge(database, curTable))
    hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
	    "page (not implemented yet).  Statistics shown here are only for "
	    "the primary table %s (%s).</EM>", shortLabel, table);

struct bbiFile *bwf = bigWigFileOpen(fileName);
struct region *region, *regionList = getRegions();
double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
bits64 validCount = 0;

if (!anyFilter() && !anyIntersection())
    for (region = regionList; region != NULL; region = region->next)
	struct bbiSummaryElement sum;
	if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end,
		bigWigIntervalQuery, 1, &sum))
	    if (validCount == 0)
		minVal = sum.minVal;
		maxVal = sum.maxVal;
		if (sum.minVal < minVal)
		    minVal = sum.minVal;
		if (sum.maxVal > maxVal)
		    maxVal = sum.maxVal;
	    sumData += sum.sumData;
	    sumSquares += sum.sumSquares;
	    validCount += sum.validCount;
    double ll, ul;
    enum wigCompare cmp;
    getWigFilter(database, curTable, &cmp, &ll, &ul);
    for (region = regionList; region != NULL; region = region->next)
	struct lm *lm = lmInit(0);
	struct bbiInterval *iv, *ivList;
	ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
	for (iv = ivList; iv != NULL; iv = iv->next)
	    double val = iv->val;
	    double size = iv->end - iv->start;
	    if (validCount == 0)
		minVal = maxVal = val;
		if (val < minVal)
		    minVal = val;
		if (val > maxVal)
		    maxVal = val;
	    sumData += size*val;
	    sumSquares += size*val*val;
	    validCount += size;

floatStatRow("mean", sumData/validCount);
floatStatRow("min", minVal);
floatStatRow("max", maxVal);
floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
numberStatRow("bases with data", validCount);
long long regionSize = basesInRegion(regionList,0);
long long gapTotal = gapsInRegion(conn, regionList,0);
numberStatRow("bases with sequence", regionSize - gapTotal);
numberStatRow("bases in region", regionSize);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
long wigFetchTime = clock1000() - startTime;
floatStatRow("load and calc time", 0.001*wigFetchTime);
