Example #1
0
void doSummaryStatsWiggle(struct sqlConnection *conn)
/* Put up page showing summary stats for wiggle track. */
{
// grab the right trackDb for the current table.  The curTrack variable
// has the composite trackDb in it
struct trackDb *track  = hTrackDbForTrack(database, curTable);

char *table = curTable;
struct region *region, *regionList = getRegions();
char *regionName = getRegionName();
long long regionSize = 0;
long long gapTotal = 0;
long startTime = 0, wigFetchTime = 0;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int regionCount = 0;
int regionsDone = 0;
unsigned span = 0;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
boolean hasConstraint = FALSE;
char *table2 = NULL;
boolean fullGenome = FALSE;
boolean statsHeaderDone = FALSE;
boolean gotSome = FALSE;
char *shortLabel = table;
long long statsItemCount = 0;	/*	global accumulators for overall */
int statsSpan = 0;		/*	stats summary on a multiple region */
double statsSumData = 0.0;	/*	output */
double statsSumSquares = 0.0;		/*	"  "	*/
double lowerLimit = INFINITY;		/*	"  "	*/
double upperLimit = -1.0 * INFINITY;	/*	"  "	*/

startTime = clock1000();
if (track != NULL)
     shortLabel = track->shortLabel;

/*	Count the regions, when only one, we can do more stats */
for (region = regionList; region != NULL; region = region->next)
    ++regionCount;

htmlOpen("%s (%s) Wiggle Summary Statistics", shortLabel, table);

if (anySubtrackMerge(database, curTable))
    hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
	    "page (not implemented yet).  Statistics shown here are only for "
	    "the primary table %s (%s).</EM>", shortLabel, table);

fullGenome = fullGenomeRegion();

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

for (region = regionList; region != NULL; region = region->next)
    {
    struct bed *intersectBedList = NULL;
    int operations;

    ++regionsDone;

    if (table2)
	intersectBedList = bedTable2(conn, region, table2);

    operations = wigFetchStats;
#if defined(NOT)
    /*	can't do the histogram now, that operation times out	*/
    if (1 == regionCount)
	operations |= wigFetchAscii;
#endif

    wds->setChromConstraint(wds, region->chrom);

    if (fullGenome)
	wds->setPositionConstraint(wds, 0, 0);
    else
	wds->setPositionConstraint(wds, region->start, region->end);

    if (hasConstraint)
	wds->setDataConstraint(wds, dataConstraint, ll, ul);

    /* depending on what is coming in on regionList, we may need to be
     * smart about how often we call getData for these custom tracks
     * since that is potentially a large file read each time.
     */
    if (isCustom)
	{
	if (ct->dbTrack)
	    {
	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    span = minSpan(trashConn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    valuesMatched = getWigglePossibleIntersection(wds, region,
		CUSTOM_TRASH, table2, &intersectBedList,
		    splitTableOrFileName, operations);
	    hFreeConn(&trashConn);
	    }
	else
	    {
	    valuesMatched = getWigglePossibleIntersection(wds, region, NULL,
		table2, &intersectBedList, splitTableOrFileName, operations);

	/*  XXX We need to properly get the smallest span for custom tracks */
	    /*	This is not necessarily the correct answer here	*/
	    if (wds->stats)
		span = wds->stats->span;
	    else
		span = 1;
	    }
	}
    else
	{
	if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	    {
	    span = minSpan(conn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, track);
	    wds->setSpanConstraint(wds, span);
	    valuesMatched = getWigglePossibleIntersection(wds, region,
		database, table2, &intersectBedList, splitTableOrFileName,
		    operations);
	    if (intersectBedList)
		span = 1;
	    }
	}
    /*	when doing multiple regions, we need to print out each result as
     *	it happens to keep the connection open to the browser and
     *	prevent any timeout since this could take a while.
     *	(worst case test is quality track on panTro1)
     */
    if (wds->stats)
	statsItemCount += wds->stats->count;
    if (wds->stats && (regionCount > 1) && (valuesMatched > 0))
	{
	double sumData = wds->stats->mean * wds->stats->count;
	double sumSquares;

	if (wds->stats->count > 1)
	    sumSquares = (wds->stats->variance * (wds->stats->count - 1)) +
		((sumData * sumData)/wds->stats->count);
	else
	    sumSquares = sumData * sumData;

	/*	global accumulators for overall summary	*/
	statsSpan = wds->stats->span;
	statsSumData += sumData;
	statsSumSquares += sumSquares;
	if (wds->stats->lowerLimit < lowerLimit)
	    lowerLimit = wds->stats->lowerLimit;
	if ((wds->stats->lowerLimit + wds->stats->dataRange) > upperLimit)
	    upperLimit = wds->stats->lowerLimit + wds->stats->dataRange;

	if (statsHeaderDone)
	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, FALSE, TRUE);
	else
	    {
	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, TRUE);
	    statsHeaderDone = TRUE;
	    }
	wds->freeStats(wds);
	gotSome = TRUE;
	}
    if ((regionCount > MAX_REGION_DISPLAY) &&
		(regionsDone >= MAX_REGION_DISPLAY))
	{
	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> Can not display more "
	    "than %d regions, <BR> would take too much time </TH></TR>\n",
		MAX_REGION_DISPLAY);
	break;	/*	exit this for loop	*/
	}
    }	/*for (region = regionList; region != NULL; region = region->next) */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

if (1 == regionCount)
    {
    statsPreamble(wds, regionList->chrom, regionList->start, regionList->end,
	span, valuesMatched, table2);
    /* 3 X TRUE = sort results, html table output, with header,
     *	the FALSE means close the table after printing, no more rows to
     *	come.  The case in the if() statement was already taken care of
     *	in the statsPreamble() printout.  No need to do that again.
     */

    if ( ! ((valuesMatched == 0) && table2) )
	wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);
    regionSize = basesInRegion(regionList,0);
    gapTotal = gapsInRegion(conn, regionList,0);
    }
else
    {	/* this is a bit of a kludge here since these printouts are done in the
	 *	library source wigDataStream.c statsOut() function and
	 *	this is a clean up of that.  That function should be
	 *	pulled out of there and made independent and more
	 *	versatile.
	 */
    long long realSize;
    double variance;
    double stddev;

    /*	Too expensive to lookup the numbers for thousands of regions */
    regionSize = basesInRegion(regionList,MAX_REGION_DISPLAY);
    gapTotal = gapsInRegion(conn, regionList,MAX_REGION_DISPLAY);
    realSize = regionSize - gapTotal;

    /*	close the table which was left open in the loop above	*/
    if (!gotSome)
	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> No data found matching this request </TH></TR>\n");

    hPrintf("<TR><TH ALIGN=LEFT> SUMMARY: </TH>\n");
    hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromStart	*/
    hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromEnd	*/
    hPrintf("\t<TD ALIGN=RIGHT> ");
    printLongWithCommas(stdout, statsItemCount);
    hPrintf(" </TD>\n" );
    hPrintf("\t<TD ALIGN=RIGHT> %d </TD>\n", statsSpan);
    hPrintf("\t<TD ALIGN=RIGHT> ");
    printLongWithCommas(stdout, statsItemCount*statsSpan);
    hPrintf("&nbsp;(%.2f%%) </TD>\n",
	100.0*(double)(statsItemCount*statsSpan)/(double)realSize);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", lowerLimit);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit - lowerLimit);
    if (statsItemCount > 0)
	hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", statsSumData/statsItemCount);
    else
	hPrintf("\t<TD ALIGN=RIGHT> 0.0 </TD>\n");
    stddev = 0.0;
    variance = 0.0;
    if (statsItemCount > 1)
	{
	variance = (statsSumSquares -
	    ((statsSumData * statsSumData)/(double) statsItemCount)) /
		(double) (statsItemCount - 1);
	if (variance > 0.0)
	    stddev = sqrt(variance);
	}
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", variance);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", stddev);
    hPrintf("</TR>\n");
    wigStatsTableHeading(stdout, TRUE);
    hPrintf("</TABLE></TD></TR></TABLE></P>\n");
    }


#if defined(NOT)
/*	can't do the histogram now, that operation times out	*/
/*	Single region, we can do the histogram	*/
if ((valuesMatched > 1) && (1 == regionCount))
    {
    float *valuesArray = NULL;
    size_t valueCount = 0;
    struct histoResult *histoGramResult;

    /*	convert the ascii data listings to one giant float array 	*/
    valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);

    /*	histoGram() may return NULL if it doesn't work	*/

    histoGramResult = histoGram(valuesArray, valueCount,
	    NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
		(float) (wds->stats->lowerLimit + wds->stats->dataRange),
		(struct histoResult *)NULL);

    printHistoGram(histoGramResult, TRUE);	/* TRUE == html output */

    freeHistoGram(&histoGramResult);
    wds->freeAscii(wds);
    wds->freeArray(wds);
    }
#endif

wds->freeStats(wds);
wiggleDataStreamFree(&wds);

wigFetchTime = clock1000() - startTime;
webNewSection("Region and Timing Statistics");
hTableStart();
stringStatRow("region", regionName);
numberStatRow("bases in region", regionSize);
numberStatRow("bases in gaps", gapTotal);
floatStatRow("load and calc time", 0.001*wigFetchTime);
wigFilterStatRow(conn);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
hTableEnd();
htmlClose();
}	/*	void doSummaryStatsWiggle(struct sqlConnection *conn)	*/
Example #2
0
static int wigOutRegion(char *table, struct sqlConnection *conn,
	struct region *region, int maxOut, enum wigOutputType wigOutType,
	struct wigAsciiData **data, int spanConstraint)
/* Write out wig data in region.  Write up to maxOut elements.
 * Returns number of elements written. */
{
int linesOut = 0;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchAscii;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;

switch (wigOutType)
    {
    case wigOutBed:
	operations = wigFetchBed;
	break;
    default:
    case wigDataNoPrint:
    case wigOutData:
	operations = wigFetchAscii;
	break;
    };

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

wds->setMaxOutput(wds, maxOut);
wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);

if (table2)
    intersectBedList = bedTable2(conn, region, table2);

if (isCustom)
    {
    if (ct->dbTrack)
	{
	if (spanConstraint)
	    wds->setSpanConstraint(wds,spanConstraint);
	else
	    {
	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    unsigned span = minSpan(trashConn, splitTableOrFileName,
		region->chrom, region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    hFreeConn(&trashConn);
	    }
	valuesMatched = getWigglePossibleIntersection(wds, region,
	    CUSTOM_TRASH, table2, &intersectBedList,
		splitTableOrFileName, operations);
	}
    else
	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
	    &intersectBedList, splitTableOrFileName, operations);
    }
else
    {
    if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	{
	/* XXX TBD, watch for a span limit coming in as an SQL filter */
	if (intersectBedList)
	    {
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    unsigned span;
	    span = minSpan(conn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    }
	else if (spanConstraint)
	    wds->setSpanConstraint(wds,spanConstraint);

	valuesMatched = getWigglePossibleIntersection(wds, region, database,
	    table2, &intersectBedList, splitTableOrFileName, operations);
	}
    }

switch (wigOutType)
    {
    case wigDataNoPrint:
	if (data)
	    {
	    if (*data != NULL)	/* no exercise of this function yet	*/
		{	/*	data not null, add to existing list	*/
		struct wigAsciiData *asciiData;
		struct wigAsciiData *next;
		for (asciiData = *data; asciiData; asciiData = next)
		    {
		    next = asciiData->next;
		    slAddHead(&wds->ascii, asciiData);
		    }
		}
	    wds->sortResults(wds);
	    *data = wds->ascii;	/* moving the list to *data */
	    wds->ascii = NULL;	/* gone as far as wds is concerned */
	    }
	    linesOut = valuesMatched;
	break;
    case wigOutBed:
	linesOut = wds->bedOut(wds, "stdout", TRUE);/* TRUE == sort output */
	break;
    default:
    case wigOutData:
	linesOut = wds->asciiOut(wds, database, "stdout", TRUE, FALSE);
	break;		/* TRUE == sort output, FALSE == not raw data out */
    };

wiggleDataStreamFree(&wds);

return linesOut;
}	/*	static int wigOutRegion()	*/
Example #3
0
struct bed *getWiggleAsBed(
    char *db, char *table, 	/* Database and table. */
    struct region *region,	/* Region to get data for. */
    char *filter, 		/* Filter to add to SQL where clause if any. */
    struct hash *idHash, 	/* Restrict to id's in this hash if non-NULL. */
    struct lm *lm,		/* Where to allocate memory. */
    struct sqlConnection *conn)	/* SQL connection to work with */
/* Return a bed list of all items in the given range in table.
 * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
/* filter, idHash and lm are currently unused, perhaps future use	*/
{
struct bed *bedList=NULL;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchBed;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;
int maxOut;

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

maxOut = bigFileMaxOutput();

wds->setMaxOutput(wds, maxOut);

wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);

if (table2)
    intersectBedList = bedTable2(conn, region, table2);

if (isCustom)
    {
    if (ct->dbTrack)
	{
	unsigned span = 0;
	struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	valuesMatched = getWigglePossibleIntersection(wds, region,
	    CUSTOM_TRASH, table2, &intersectBedList,
		splitTableOrFileName, operations);
	span = minSpan(trashConn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);
	hFreeConn(&trashConn);
	}
    else
	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
	    &intersectBedList, splitTableOrFileName, operations);
    }
else
    {
    if (conn == NULL)
	errAbort( "getWiggleAsBed: NULL conn given for database table");

    if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	{
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	unsigned span = 0;

	/* XXX TBD, watch for a span limit coming in as an SQL filter */
	span = minSpan(conn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);

	valuesMatched = getWigglePossibleIntersection(wds, region, database,
	    table2, &intersectBedList, splitTableOrFileName, operations);
	}
    }

if (valuesMatched > 0)
    {
    struct bed *bed;

    wds->sortResults(wds);
    for (bed = wds->bed; bed != NULL; bed = bed->next)
	{
	struct bed *copy = lmCloneBed(bed, lm);
	slAddHead(&bedList, copy);
	}
    slReverse(&bedList);
    }

wiggleDataStreamFree(&wds);

return bedList;
}	/*	struct bed *getWiggleAsBed()	*/
Example #4
0
void genericWiggleClick(struct sqlConnection *conn, struct trackDb *tdb, 
	char *item, int start)
/* Display details for Wiggle data tracks.
 *	conn may be NULL for custom tracks when from file */
{
char *chrom = cartString(cart, "c");
char table[64];
boolean hasBin;
unsigned span = 0;
struct wiggleDataStream *wds = wiggleDataStreamNew();
unsigned long long valuesMatched = 0;
struct histoResult *histoGramResult;
float *valuesArray = NULL;
size_t valueCount = 0;
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
int operations = wigFetchStats;	/*	default operation */

if (startsWith("ct_", tdb->table))
    {
    ct = lookupCt(tdb->table);
    if (!ct)
        {
        warn("<P>wiggleClick: can not find custom wiggle track '%s'</P>", tdb->table);
        return;
        }
    if (! ct->wiggle)
        {
        warn("<P>wiggleClick: called to do stats on a custom track that isn't wiggle data ?</P>");
        return;
        }
    if (ct->dbTrack)
	{
	safef(table,ArraySize(table), "%s", ct->dbTableName);
	span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb);
	}
    else
	{
	safef(table,ArraySize(table), "%s", ct->wigFile);
	span = 0;	/*	cause all spans to be examined	*/
	}
    isCustom = TRUE;
    }
else
    {
    hFindSplitTable(database, seqName, tdb->table, table, &hasBin);
    /*span = spanInUse(conn, table, chrom, winStart, winEnd, cart);*/
    span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb);
    }

/*	if for some reason we don't have a chrom and win positions, this
 *	should be run in a loop that does one chrom at a time.  In the
 *	case of hgc, there seems to be a chrom and a position.
 */
wds->setSpanConstraint(wds, span);
wds->setChromConstraint(wds, chrom);
wds->setPositionConstraint(wds, winStart, winEnd);

/*	If our window is less than some number of points, we can do
 *	the histogram too.
 */
#define MAX_WINDOW_ALLOW_STATS	100000001
#define MAX_WINDOW_ALLOW_STRING	"100,000,000"
if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS)
	operations |= wigFetchAscii;

/*	We want to also fetch the actual data values so we can run a
 *	histogram function on them.  You can't fetch the data in the
 *	form of the data array since the span information is then lost.
 *	We have to do the ascii data list format, and prepare that to
 *	send to the histogram function.
 */

if (isCustom)
    {
    if (ct->dbTrack)
	valuesMatched = wds->getData(wds, CUSTOM_TRASH, table, operations);
    else
	valuesMatched = wds->getData(wds, (char *)NULL, table, operations);
    }
else
    valuesMatched = wds->getData(wds, database, table, operations);

statsPreamble(wds, chrom, winStart, winEnd, span, valuesMatched, NULL);

/*	output statistics table
 *		(+sort, +html output, +with header, +close table)
 */
wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);

if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS)
    {
    char *words[16];
    int wordCount = 0;
    char *dupe = cloneString(tdb->type);
    double minY, maxY, tDbMinY, tDbMaxY;
    float hMin, hMax, hRange;

    wordCount = chopLine(dupe, words);

    wigFetchMinMaxY(tdb, &minY, &maxY, &tDbMinY, &tDbMaxY, wordCount, words);
    hMin = min(minY,tDbMinY);
    hMax = max(maxY,tDbMaxY);
    hRange = hMax - hMin;

    /*	convert the ascii data listings to one giant float array 	*/
    valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);

    /* let's see if we really want to use the range from the track type
     *	line, or the actual range in this data.  If there is a good
     *	actual range in the data, use that instead
     */
    if (hRange > 0.0) 
    	{
	if (wds->stats->dataRange != 0)
	    hRange = 0.0;
	}

    /*	If we have a valid range, use a specified 20 bin histogram
     *	NOTE: pass 21 as binCount to get a 20 bin histogram
     */
    if (hRange > 0.0)
	histoGramResult = histoGram(valuesArray, valueCount, (hRange/20.0),
	    (unsigned) 21, hMin, hMin, hMax, (struct histoResult *)NULL);
    else
	histoGramResult = histoGram(valuesArray, valueCount,
	    NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
		(float) (wds->stats->lowerLimit + wds->stats->dataRange),
		    (struct histoResult *)NULL);

    /*	histoGram() may return NULL if it doesn't work, that's OK, the
     *	print out will indicate no results  (TRUE == html output)
     */
    printHistoGram(histoGramResult, TRUE);

    freeHistoGram(&histoGramResult);
    freeMem(valuesArray);
    }
else
    {
    printf("<P>(viewing windows of fewer than %s bases will also"
	" display a histogram)</P>\n", MAX_WINDOW_ALLOW_STRING);
    }

wiggleDataStreamFree(&wds);
}