Exemple #1
0
static void altSplicePrint(struct section *section,
	struct sqlConnection *conn, char *geneId)
/* Print out altSplicing info. */
{
char *altId = section->items;
char query[256];
struct sqlResult *sr;
char **row;
struct altGraphX *ag;
char table[64];
boolean hasBin;

hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, "altGraphX", table, &hasBin);
sqlSafef(query, sizeof(query), "select * from %s where name='%s'", table, altId);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    ag = altGraphXLoad(row+hasBin);
    hPrintf("<TABLE><TR><TD BGCOLOR='#888888'>\n");
    altGraphXMakeImage(ag);
    hPrintf("</TD></TR></TABLE><BR>");
    }
sqlFreeResult(&sr);
hPrintf("This graph shows alternative splicing observed in mRNAs and "
        "ESTs that is either conserved in mouse, present in full length "
	"mRNAs, or observed at least three times in ESTs.");
}
Exemple #2
0
static struct slName *getExamples(char *db, struct sqlConnection *conn,
				  char *table, char *field, int count)
/* Return a list of several example values of table.field. */
{
boolean isTabix = FALSE;
if (isBamTable(table))
    {
    assert(sameString(field, "qName"));
    return randomBamIds(table, conn, count);
    }
else if (isBigBed(db, table, curTrack, ctLookupName))
    {
    assert(sameString(field, "name"));
    return randomBigBedIds(table, conn, count);
    }
else if (isVcfTable(table, &isTabix))
    {
    assert(sameString(field, "id"));
    return randomVcfIds(table, conn, count, isTabix);
    }
else
    {
    char fullTable[HDB_MAX_TABLE_STRING];
    char *c = strchr(table, '.');
    if (c || ! hFindSplitTable(database, NULL, table, fullTable, NULL))
	safecpy(fullTable, sizeof(fullTable), table);
    return sqlRandomSampleConn(conn, fullTable, field, count);
    }
}
Exemple #3
0
void orTable(char *database, Bits *acc, char *track, char *chrom, 
	int chromSize, struct sqlConnection *conn)
/* Or in table if it exists.  Else do nothing. */
{
char t[512], *s;
char table[HDB_MAX_TABLE_STRING];

isolateTrackPartOfSpec(track, t);
s = strrchr(t, '.');
if (s != NULL)
    {
    orFile(acc, track, chrom, chromSize);
    }
else
    {
    boolean hasBin;
    int minFeatureSize = optionInt("minFeatureSize", 0);
    boolean isSplit = hFindSplitTable(database, chrom, t, table, &hasBin);
    boolean isFound = hTableExists(database, table);
    verbose(3,"orTable: db: %s isFound: %s isSplit: %s %s %s %s\n", database,
	isFound ? "TRUE" : "FALSE",
	    isSplit ? "TRUE" : "FALSE", chrom, t, table );
    if (isFound)
	fbOrTableBitsQueryMinSize(database, acc, track, chrom, chromSize, conn, where,
		   TRUE, TRUE, minFeatureSize);
    }
}
Exemple #4
0
static struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[HDB_MAX_TABLE_STRING];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
if (!hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, sizeof table, &hasBin))
    errAbort("track %s not found", track);
bool hasAttrId = sqlColumnExists(conn, table, "alignId");
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row + hasBin);

#define  ALIGNIDFIELD      11  // Gencode Id
    if (hasAttrId)
	curAlignId = cloneString(row[ALIGNIDFIELD]);
    else
	curAlignId = gp->name;
    }
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
Exemple #5
0
void doFlyreg(struct trackDb *tdb, char *item)
/* flyreg.org: Drosophila DNase I Footprint db. */
{
struct dyString *query = newDyString(256);
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr = NULL;
char **row;
int start = cartInt(cart, "o");
int end   = cartInt(cart, "t");
char fullTable[HDB_MAX_TABLE_STRING];
boolean hasBin = FALSE;
char *motifTable = "flyregMotif";
struct dnaMotif *motif = NULL;
boolean isVersion2 = sameString(tdb->table, "flyreg2");

genericHeader(tdb, item);
if (!hFindSplitTable(database, seqName, tdb->table, fullTable, sizeof fullTable, &hasBin))
    errAbort("track %s not found", tdb->table);
sqlDyStringPrintf(query, "select * from %s where chrom = '%s' and ",
	       fullTable, seqName);
hAddBinToQuery(start, end, query);
sqlDyStringPrintf(query, "chromStart = %d and name = '%s'", start, item);
sr = sqlGetResult(conn, query->string);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct flyreg2 fr;
    if (isVersion2)
	flyreg2StaticLoad(row+hasBin, &fr);
    else
	flyregStaticLoad(row+hasBin, (struct flyreg *)(&fr));
    printf("<B>Factor:</B> %s<BR>\n", fr.name);
    printf("<B>Target:</B> %s<BR>\n", fr.target);
    if (isVersion2)
	printf("<B>Footprint ID:</B> %06d<BR>\n", fr.fpid);
    printf("<B>PubMed ID:</B> <A HREF=\"");
    printEntrezPubMedUidUrl(stdout, fr.pmid);
    printf("\" TARGET=_BLANK>%d</A><BR>\n", fr.pmid);
    bedPrintPos((struct bed *)(&fr), 3, tdb);
    if (hTableExists(database, motifTable))
	{
	motif = loadDnaMotif(item, motifTable);
	if (motif != NULL)
	    motifHitSection(NULL, motif);
	}
    }
else
    errAbort("query returned no results: \"%s\"", query->string);
dyStringFree(&query);
sqlFreeResult(&sr);
hFreeConn(&conn);
if (motif != NULL)
    webNewSection("%s",tdb->longLabel);
printTrackHtml(tdb);
}
Exemple #6
0
struct wiggleDataStream *wigChromRawStats(char *chrom)
/* Fetch stats for wig data in chrom.  
 * Returns a wiggleDataStream, free it with wiggleDataStreamFree() */
{
char splitTableOrFileName[256];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
struct wiggleDataStream *wds = NULL;
int operations = wigFetchRawStats;
char *table = curTable;

/* ct, isCustom, wds are set here */
if (isCustomTrack(table)) 
    { 
    ct = lookupCt(table); 
    isCustom = TRUE; 
    if (! ct->wiggle) 
	errAbort("called to work on a custom track '%s' that isn't wiggle data ?", table); 
 
    if (ct->dbTrack) 
	safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", 
		ct->dbTableName); 
    else 
	safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", 
		ct->wigFile); 
    } 

wds = wiggleDataStreamNew(); 

wds->setChromConstraint(wds, chrom);

if (isCustom)
    {
    if (ct->dbTrack)
	wds->getData(wds, CUSTOM_TRASH, splitTableOrFileName, operations);
    else
	wds->getData(wds, NULL, splitTableOrFileName, operations);
    }
else
    {
    boolean hasBin = FALSE;
    if (hFindSplitTable(database, chrom, table, splitTableOrFileName, &hasBin))
	{
	wds->getData(wds, database, splitTableOrFileName, operations);
	}
    }
return wds;
}
Exemple #7
0
void chromFeatureSeq(struct sqlConnection *conn, 
	char *database, char *chrom, char *trackSpec,
	FILE *bedFile, FILE *faFile,
	int *retItemCount, int *retBaseCount)
/* Write out sequence file for features from one chromosome.
 * This separate routine handles the non-merged case.  It's
 * reason for being is so that the feature names get preserved. */
{
boolean hasBin;
char t[512], *s = NULL;
char table[HDB_MAX_TABLE_STRING];
struct featureBits *fbList = NULL, *fb;

if (trackSpec[0] == '!')
   errAbort("Sorry, '!' not available with fa output unless you use faMerge");
isolateTrackPartOfSpec(trackSpec, t);
s = strchr(t, '.');
if (s != NULL)
    errAbort("Sorry, only database (not file) tracks allowed with "
             "fa output unless you use faMerge");
// ignore isSplit return from hFindSplitTable()
(void) hFindSplitTable(database, chrom, t, table, &hasBin);
fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom),
			 where, TRUE, TRUE);
for (fb = fbList; fb != NULL; fb = fb->next)
    {
    int s = fb->start, e = fb->end;
    if (bedFile != NULL)
	{
	fprintf(bedFile, "%s\t%d\t%d\t%s", 
	    fb->chrom, fb->start, fb->end, fb->name);
	if (fb->strand != '?')
	    fprintf(bedFile, "\t0\t%c", fb->strand);
	fprintf(bedFile, "\n");
	}
    if (faFile != NULL)
        {
	struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower);
	if (fb->strand == '-')
	    reverseComplement(seq->dna, seq->size);
	faWriteNext(faFile, fb->name, seq->dna, seq->size);
	freeDnaSeq(&seq);
	}
    }
featureBitsFreeList(&fbList);
}
Exemple #8
0
/* load one or more genePreds from the database */
struct genePred *getPredsForName(char *name, char *geneTable, char *db)
{
struct sqlConnection *conn = hAllocConn(db);
struct genePred *list = NULL;
char splitTable[HDB_MAX_TABLE_STRING];
struct genePred *gene;
boolean hasBin;
struct genePredReader *reader;

boolean found =  hFindSplitTable(db, NULL, geneTable,
	splitTable, &hasBin);

if (!found)
    errAbort("can't find table %s\n", geneTable);

char extra[2048];
if (onlyChrom != NULL)
    safef(extra, sizeof extra, "name='%s' and chrom='%s'", name, onlyChrom);
else
    safef(extra, sizeof extra, "name='%s'", name);

reader = genePredReaderQuery( conn, splitTable, extra);

while ((gene  = genePredReaderNext(reader)) != NULL)
    {
    verbose(2, "got gene %s\n",gene->name);
    slAddHead(&list, gene);
    }

if (list == NULL)
    errAbort("no genePred for gene %s in %s\n",name, geneTable);

slReverse(&list);

genePredReaderFree(&reader);
hFreeConn(&conn);

return list;
}
Exemple #9
0
static struct psl *loadAlign(struct sqlConnection *conn, struct mappingInfo *mi, int start)
/* load a psl that must exist */
{
char rootTable[256], table[256], query[256];
boolean hasBin;
struct sqlResult *sr;
char **row;
struct psl *psl;

if (mi->suffix == NULL)
    safef(rootTable, sizeof(rootTable), "%s%sAli", mi->tblPre, mi->geneSet);
else
    safef(rootTable, sizeof(rootTable), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix);
hFindSplitTable(database, seqName, rootTable, table, &hasBin);

sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tStart = %d",
      table, mi->pg->name, start);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
psl = pslLoad(row+hasBin);
sqlFreeResult(&sr);
return psl;
}
Exemple #10
0
struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[64];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, &hasBin);
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    gp = genePredLoad(row + hasBin);
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
Exemple #11
0
struct chain *chainDbLoad(char *db, struct sqlConnection *conn, char *track,
			  char *chrom, int id)
/** Load chain. */
{
char table[HDB_MAX_TABLE_STRING];
char query[256];
struct sqlResult *sr;
char **row;
int rowOffset;
struct chain *chain = NULL;

if (!hFindSplitTable(db, chrom, track, table, sizeof table, &rowOffset))
    errAbort("No %s track in database", track);
sqlSafef(query, sizeof(query), 
	 "select * from %s where id = %d", table, id);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row == NULL)
    errAbort("Can't find %d in %s", id, table);
chain = chainHeadLoad(row + rowOffset);
sqlFreeResult(&sr);
chainDbAddBlocks(chain, track, conn);
return chain;
}
Exemple #12
0
static struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd)
/* Load a list of psls given qName tName tStart tEnd */
{
struct sqlResult *sr = NULL;
char **row;
struct psl *psl = NULL, *pslList = NULL;
boolean hasBin;
char splitTable[64];
char query[256];
struct sqlConnection *conn = hAllocConn(database);

hFindSplitTable(database, seqName, table, splitTable, &hasBin);
sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s' and tEnd > %d and tStart < %d", splitTable, qName, tName, tStart, tEnd);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    psl = pslLoad(row+hasBin);
    slAddHead(&pslList, psl);
    }
sqlFreeResult(&sr);
slReverse(&pslList);
hFreeConn(&conn);
return pslList;
}
Exemple #13
0
static int wigOutRegion(char *table, struct sqlConnection *conn,
	struct region *region, int maxOut, enum wigOutputType wigOutType,
	struct wigAsciiData **data, int spanConstraint)
/* Write out wig data in region.  Write up to maxOut elements.
 * Returns number of elements written. */
{
int linesOut = 0;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchAscii;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;

switch (wigOutType)
    {
    case wigOutBed:
	operations = wigFetchBed;
	break;
    default:
    case wigDataNoPrint:
    case wigOutData:
	operations = wigFetchAscii;
	break;
    };

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

wds->setMaxOutput(wds, maxOut);
wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);

if (table2)
    intersectBedList = bedTable2(conn, region, table2);

if (isCustom)
    {
    if (ct->dbTrack)
	{
	if (spanConstraint)
	    wds->setSpanConstraint(wds,spanConstraint);
	else
	    {
	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    unsigned span = minSpan(trashConn, splitTableOrFileName,
		region->chrom, region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    hFreeConn(&trashConn);
	    }
	valuesMatched = getWigglePossibleIntersection(wds, region,
	    CUSTOM_TRASH, table2, &intersectBedList,
		splitTableOrFileName, operations);
	}
    else
	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
	    &intersectBedList, splitTableOrFileName, operations);
    }
else
    {
    if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	{
	/* XXX TBD, watch for a span limit coming in as an SQL filter */
	if (intersectBedList)
	    {
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    unsigned span;
	    span = minSpan(conn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    }
	else if (spanConstraint)
	    wds->setSpanConstraint(wds,spanConstraint);

	valuesMatched = getWigglePossibleIntersection(wds, region, database,
	    table2, &intersectBedList, splitTableOrFileName, operations);
	}
    }

switch (wigOutType)
    {
    case wigDataNoPrint:
	if (data)
	    {
	    if (*data != NULL)	/* no exercise of this function yet	*/
		{	/*	data not null, add to existing list	*/
		struct wigAsciiData *asciiData;
		struct wigAsciiData *next;
		for (asciiData = *data; asciiData; asciiData = next)
		    {
		    next = asciiData->next;
		    slAddHead(&wds->ascii, asciiData);
		    }
		}
	    wds->sortResults(wds);
	    *data = wds->ascii;	/* moving the list to *data */
	    wds->ascii = NULL;	/* gone as far as wds is concerned */
	    }
	    linesOut = valuesMatched;
	break;
    case wigOutBed:
	linesOut = wds->bedOut(wds, "stdout", TRUE);/* TRUE == sort output */
	break;
    default:
    case wigOutData:
	linesOut = wds->asciiOut(wds, database, "stdout", TRUE, FALSE);
	break;		/* TRUE == sort output, FALSE == not raw data out */
    };

wiggleDataStreamFree(&wds);

return linesOut;
}	/*	static int wigOutRegion()	*/
Exemple #14
0
void checkInputExists(struct sqlConnection *conn,char *database, 
	struct chromInfo *chromInfoList, int tableCount, char *tables[])
/* check input tables/files exist, especially to handle split tables */
{
char *track=NULL;
int i = 0, missing=0;
char t[512], *s=NULL;
char table[HDB_MAX_TABLE_STRING];
char fileName[512];
boolean found = FALSE;

for (i=0; i<tableCount; ++i)
    {
    struct chromInfo *cInfo;

    track = tables[i];
    if (track[0] == '!')
	{
	++track;
	}
    isolateTrackPartOfSpec(track, t);
    s = strrchr(t, '.');
    if (s)
	{
	if (fileExists(t))
	    continue;
	}
    else
	{
	if (NULL == conn) conn = hAllocConn(database);
	if (sqlTableExists(conn, t))
	    continue;
	}
    found = FALSE;
    for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
	{
	if (inclChrom(cInfo->chrom))
	    {
	    if (s)
		{
		chromFileName(t, cInfo->chrom, fileName);
		if (fileExists(fileName))
		    {
		    found = TRUE;
		    break;
		    }
		}
	    else
		{
		boolean hasBin;
		if (hFindSplitTable(database, cInfo->chrom, t, table, &hasBin))
		    {
		    found = TRUE;
		    break;
		    }
		}
	    }
	}
    if (!found)
	{
	if (s)
	    warn("file %s not found for any chroms", t);
	else
	    warn("table %s not found for any chroms", t);
	++missing;	    
	}
    }
if (missing>0)
    errAbort("Error: %d input table(s)/file(s) do not exist for any of the chroms specified",missing);
}
Exemple #15
0
void doSummaryStatsWiggle(struct sqlConnection *conn)
/* Put up page showing summary stats for wiggle track. */
{
// grab the right trackDb for the current table.  The curTrack variable
// has the composite trackDb in it
struct trackDb *track  = hTrackDbForTrack(database, curTable);

char *table = curTable;
struct region *region, *regionList = getRegions();
char *regionName = getRegionName();
long long regionSize = 0;
long long gapTotal = 0;
long startTime = 0, wigFetchTime = 0;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int regionCount = 0;
int regionsDone = 0;
unsigned span = 0;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
boolean hasConstraint = FALSE;
char *table2 = NULL;
boolean fullGenome = FALSE;
boolean statsHeaderDone = FALSE;
boolean gotSome = FALSE;
char *shortLabel = table;
long long statsItemCount = 0;	/*	global accumulators for overall */
int statsSpan = 0;		/*	stats summary on a multiple region */
double statsSumData = 0.0;	/*	output */
double statsSumSquares = 0.0;		/*	"  "	*/
double lowerLimit = INFINITY;		/*	"  "	*/
double upperLimit = -1.0 * INFINITY;	/*	"  "	*/

startTime = clock1000();
if (track != NULL)
     shortLabel = track->shortLabel;

/*	Count the regions, when only one, we can do more stats */
for (region = regionList; region != NULL; region = region->next)
    ++regionCount;

htmlOpen("%s (%s) Wiggle Summary Statistics", shortLabel, table);

if (anySubtrackMerge(database, curTable))
    hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
	    "page (not implemented yet).  Statistics shown here are only for "
	    "the primary table %s (%s).</EM>", shortLabel, table);

fullGenome = fullGenomeRegion();

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

for (region = regionList; region != NULL; region = region->next)
    {
    struct bed *intersectBedList = NULL;
    int operations;

    ++regionsDone;

    if (table2)
	intersectBedList = bedTable2(conn, region, table2);

    operations = wigFetchStats;
#if defined(NOT)
    /*	can't do the histogram now, that operation times out	*/
    if (1 == regionCount)
	operations |= wigFetchAscii;
#endif

    wds->setChromConstraint(wds, region->chrom);

    if (fullGenome)
	wds->setPositionConstraint(wds, 0, 0);
    else
	wds->setPositionConstraint(wds, region->start, region->end);

    if (hasConstraint)
	wds->setDataConstraint(wds, dataConstraint, ll, ul);

    /* depending on what is coming in on regionList, we may need to be
     * smart about how often we call getData for these custom tracks
     * since that is potentially a large file read each time.
     */
    if (isCustom)
	{
	if (ct->dbTrack)
	    {
	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	    span = minSpan(trashConn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, tdb);
	    wds->setSpanConstraint(wds, span);
	    valuesMatched = getWigglePossibleIntersection(wds, region,
		CUSTOM_TRASH, table2, &intersectBedList,
		    splitTableOrFileName, operations);
	    hFreeConn(&trashConn);
	    }
	else
	    {
	    valuesMatched = getWigglePossibleIntersection(wds, region, NULL,
		table2, &intersectBedList, splitTableOrFileName, operations);

	/*  XXX We need to properly get the smallest span for custom tracks */
	    /*	This is not necessarily the correct answer here	*/
	    if (wds->stats)
		span = wds->stats->span;
	    else
		span = 1;
	    }
	}
    else
	{
	if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	    {
	    span = minSpan(conn, splitTableOrFileName, region->chrom,
		region->start, region->end, cart, track);
	    wds->setSpanConstraint(wds, span);
	    valuesMatched = getWigglePossibleIntersection(wds, region,
		database, table2, &intersectBedList, splitTableOrFileName,
		    operations);
	    if (intersectBedList)
		span = 1;
	    }
	}
    /*	when doing multiple regions, we need to print out each result as
     *	it happens to keep the connection open to the browser and
     *	prevent any timeout since this could take a while.
     *	(worst case test is quality track on panTro1)
     */
    if (wds->stats)
	statsItemCount += wds->stats->count;
    if (wds->stats && (regionCount > 1) && (valuesMatched > 0))
	{
	double sumData = wds->stats->mean * wds->stats->count;
	double sumSquares;

	if (wds->stats->count > 1)
	    sumSquares = (wds->stats->variance * (wds->stats->count - 1)) +
		((sumData * sumData)/wds->stats->count);
	else
	    sumSquares = sumData * sumData;

	/*	global accumulators for overall summary	*/
	statsSpan = wds->stats->span;
	statsSumData += sumData;
	statsSumSquares += sumSquares;
	if (wds->stats->lowerLimit < lowerLimit)
	    lowerLimit = wds->stats->lowerLimit;
	if ((wds->stats->lowerLimit + wds->stats->dataRange) > upperLimit)
	    upperLimit = wds->stats->lowerLimit + wds->stats->dataRange;

	if (statsHeaderDone)
	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, FALSE, TRUE);
	else
	    {
	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, TRUE);
	    statsHeaderDone = TRUE;
	    }
	wds->freeStats(wds);
	gotSome = TRUE;
	}
    if ((regionCount > MAX_REGION_DISPLAY) &&
		(regionsDone >= MAX_REGION_DISPLAY))
	{
	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> Can not display more "
	    "than %d regions, <BR> would take too much time </TH></TR>\n",
		MAX_REGION_DISPLAY);
	break;	/*	exit this for loop	*/
	}
    }	/*for (region = regionList; region != NULL; region = region->next) */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

if (1 == regionCount)
    {
    statsPreamble(wds, regionList->chrom, regionList->start, regionList->end,
	span, valuesMatched, table2);
    /* 3 X TRUE = sort results, html table output, with header,
     *	the FALSE means close the table after printing, no more rows to
     *	come.  The case in the if() statement was already taken care of
     *	in the statsPreamble() printout.  No need to do that again.
     */

    if ( ! ((valuesMatched == 0) && table2) )
	wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);
    regionSize = basesInRegion(regionList,0);
    gapTotal = gapsInRegion(conn, regionList,0);
    }
else
    {	/* this is a bit of a kludge here since these printouts are done in the
	 *	library source wigDataStream.c statsOut() function and
	 *	this is a clean up of that.  That function should be
	 *	pulled out of there and made independent and more
	 *	versatile.
	 */
    long long realSize;
    double variance;
    double stddev;

    /*	Too expensive to lookup the numbers for thousands of regions */
    regionSize = basesInRegion(regionList,MAX_REGION_DISPLAY);
    gapTotal = gapsInRegion(conn, regionList,MAX_REGION_DISPLAY);
    realSize = regionSize - gapTotal;

    /*	close the table which was left open in the loop above	*/
    if (!gotSome)
	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> No data found matching this request </TH></TR>\n");

    hPrintf("<TR><TH ALIGN=LEFT> SUMMARY: </TH>\n");
    hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromStart	*/
    hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromEnd	*/
    hPrintf("\t<TD ALIGN=RIGHT> ");
    printLongWithCommas(stdout, statsItemCount);
    hPrintf(" </TD>\n" );
    hPrintf("\t<TD ALIGN=RIGHT> %d </TD>\n", statsSpan);
    hPrintf("\t<TD ALIGN=RIGHT> ");
    printLongWithCommas(stdout, statsItemCount*statsSpan);
    hPrintf("&nbsp;(%.2f%%) </TD>\n",
	100.0*(double)(statsItemCount*statsSpan)/(double)realSize);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", lowerLimit);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit - lowerLimit);
    if (statsItemCount > 0)
	hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", statsSumData/statsItemCount);
    else
	hPrintf("\t<TD ALIGN=RIGHT> 0.0 </TD>\n");
    stddev = 0.0;
    variance = 0.0;
    if (statsItemCount > 1)
	{
	variance = (statsSumSquares -
	    ((statsSumData * statsSumData)/(double) statsItemCount)) /
		(double) (statsItemCount - 1);
	if (variance > 0.0)
	    stddev = sqrt(variance);
	}
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", variance);
    hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", stddev);
    hPrintf("</TR>\n");
    wigStatsTableHeading(stdout, TRUE);
    hPrintf("</TABLE></TD></TR></TABLE></P>\n");
    }


#if defined(NOT)
/*	can't do the histogram now, that operation times out	*/
/*	Single region, we can do the histogram	*/
if ((valuesMatched > 1) && (1 == regionCount))
    {
    float *valuesArray = NULL;
    size_t valueCount = 0;
    struct histoResult *histoGramResult;

    /*	convert the ascii data listings to one giant float array 	*/
    valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);

    /*	histoGram() may return NULL if it doesn't work	*/

    histoGramResult = histoGram(valuesArray, valueCount,
	    NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
		(float) (wds->stats->lowerLimit + wds->stats->dataRange),
		(struct histoResult *)NULL);

    printHistoGram(histoGramResult, TRUE);	/* TRUE == html output */

    freeHistoGram(&histoGramResult);
    wds->freeAscii(wds);
    wds->freeArray(wds);
    }
#endif

wds->freeStats(wds);
wiggleDataStreamFree(&wds);

wigFetchTime = clock1000() - startTime;
webNewSection("Region and Timing Statistics");
hTableStart();
stringStatRow("region", regionName);
numberStatRow("bases in region", regionSize);
numberStatRow("bases in gaps", gapTotal);
floatStatRow("load and calc time", 0.001*wigFetchTime);
wigFilterStatRow(conn);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
hTableEnd();
htmlClose();
}	/*	void doSummaryStatsWiggle(struct sqlConnection *conn)	*/
Exemple #16
0
struct bed *getWiggleAsBed(
    char *db, char *table, 	/* Database and table. */
    struct region *region,	/* Region to get data for. */
    char *filter, 		/* Filter to add to SQL where clause if any. */
    struct hash *idHash, 	/* Restrict to id's in this hash if non-NULL. */
    struct lm *lm,		/* Where to allocate memory. */
    struct sqlConnection *conn)	/* SQL connection to work with */
/* Return a bed list of all items in the given range in table.
 * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
/* filter, idHash and lm are currently unused, perhaps future use	*/
{
struct bed *bedList=NULL;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchBed;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;
int maxOut;

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

maxOut = bigFileMaxOutput();

wds->setMaxOutput(wds, maxOut);

wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);

if (table2)
    intersectBedList = bedTable2(conn, region, table2);

if (isCustom)
    {
    if (ct->dbTrack)
	{
	unsigned span = 0;
	struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	valuesMatched = getWigglePossibleIntersection(wds, region,
	    CUSTOM_TRASH, table2, &intersectBedList,
		splitTableOrFileName, operations);
	span = minSpan(trashConn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);
	hFreeConn(&trashConn);
	}
    else
	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
	    &intersectBedList, splitTableOrFileName, operations);
    }
else
    {
    if (conn == NULL)
	errAbort( "getWiggleAsBed: NULL conn given for database table");

    if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	{
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	unsigned span = 0;

	/* XXX TBD, watch for a span limit coming in as an SQL filter */
	span = minSpan(conn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);

	valuesMatched = getWigglePossibleIntersection(wds, region, database,
	    table2, &intersectBedList, splitTableOrFileName, operations);
	}
    }

if (valuesMatched > 0)
    {
    struct bed *bed;

    wds->sortResults(wds);
    for (bed = wds->bed; bed != NULL; bed = bed->next)
	{
	struct bed *copy = lmCloneBed(bed, lm);
	slAddHead(&bedList, copy);
	}
    slReverse(&bedList);
    }

wiggleDataStreamFree(&wds);

return bedList;
}	/*	struct bed *getWiggleAsBed()	*/
Exemple #17
0
void genericWiggleClick(struct sqlConnection *conn, struct trackDb *tdb, 
	char *item, int start)
/* Display details for Wiggle data tracks.
 *	conn may be NULL for custom tracks when from file */
{
char *chrom = cartString(cart, "c");
char table[64];
boolean hasBin;
unsigned span = 0;
struct wiggleDataStream *wds = wiggleDataStreamNew();
unsigned long long valuesMatched = 0;
struct histoResult *histoGramResult;
float *valuesArray = NULL;
size_t valueCount = 0;
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
int operations = wigFetchStats;	/*	default operation */

if (startsWith("ct_", tdb->table))
    {
    ct = lookupCt(tdb->table);
    if (!ct)
        {
        warn("<P>wiggleClick: can not find custom wiggle track '%s'</P>", tdb->table);
        return;
        }
    if (! ct->wiggle)
        {
        warn("<P>wiggleClick: called to do stats on a custom track that isn't wiggle data ?</P>");
        return;
        }
    if (ct->dbTrack)
	{
	safef(table,ArraySize(table), "%s", ct->dbTableName);
	span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb);
	}
    else
	{
	safef(table,ArraySize(table), "%s", ct->wigFile);
	span = 0;	/*	cause all spans to be examined	*/
	}
    isCustom = TRUE;
    }
else
    {
    hFindSplitTable(database, seqName, tdb->table, table, &hasBin);
    /*span = spanInUse(conn, table, chrom, winStart, winEnd, cart);*/
    span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb);
    }

/*	if for some reason we don't have a chrom and win positions, this
 *	should be run in a loop that does one chrom at a time.  In the
 *	case of hgc, there seems to be a chrom and a position.
 */
wds->setSpanConstraint(wds, span);
wds->setChromConstraint(wds, chrom);
wds->setPositionConstraint(wds, winStart, winEnd);

/*	If our window is less than some number of points, we can do
 *	the histogram too.
 */
#define MAX_WINDOW_ALLOW_STATS	100000001
#define MAX_WINDOW_ALLOW_STRING	"100,000,000"
if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS)
	operations |= wigFetchAscii;

/*	We want to also fetch the actual data values so we can run a
 *	histogram function on them.  You can't fetch the data in the
 *	form of the data array since the span information is then lost.
 *	We have to do the ascii data list format, and prepare that to
 *	send to the histogram function.
 */

if (isCustom)
    {
    if (ct->dbTrack)
	valuesMatched = wds->getData(wds, CUSTOM_TRASH, table, operations);
    else
	valuesMatched = wds->getData(wds, (char *)NULL, table, operations);
    }
else
    valuesMatched = wds->getData(wds, database, table, operations);

statsPreamble(wds, chrom, winStart, winEnd, span, valuesMatched, NULL);

/*	output statistics table
 *		(+sort, +html output, +with header, +close table)
 */
wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);

if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS)
    {
    char *words[16];
    int wordCount = 0;
    char *dupe = cloneString(tdb->type);
    double minY, maxY, tDbMinY, tDbMaxY;
    float hMin, hMax, hRange;

    wordCount = chopLine(dupe, words);

    wigFetchMinMaxY(tdb, &minY, &maxY, &tDbMinY, &tDbMaxY, wordCount, words);
    hMin = min(minY,tDbMinY);
    hMax = max(maxY,tDbMaxY);
    hRange = hMax - hMin;

    /*	convert the ascii data listings to one giant float array 	*/
    valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);

    /* let's see if we really want to use the range from the track type
     *	line, or the actual range in this data.  If there is a good
     *	actual range in the data, use that instead
     */
    if (hRange > 0.0) 
    	{
	if (wds->stats->dataRange != 0)
	    hRange = 0.0;
	}

    /*	If we have a valid range, use a specified 20 bin histogram
     *	NOTE: pass 21 as binCount to get a 20 bin histogram
     */
    if (hRange > 0.0)
	histoGramResult = histoGram(valuesArray, valueCount, (hRange/20.0),
	    (unsigned) 21, hMin, hMin, hMax, (struct histoResult *)NULL);
    else
	histoGramResult = histoGram(valuesArray, valueCount,
	    NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
		(float) (wds->stats->lowerLimit + wds->stats->dataRange),
		    (struct histoResult *)NULL);

    /*	histoGram() may return NULL if it doesn't work, that's OK, the
     *	print out will indicate no results  (TRUE == html output)
     */
    printHistoGram(histoGramResult, TRUE);

    freeHistoGram(&histoGramResult);
    freeMem(valuesArray);
    }
else
    {
    printf("<P>(viewing windows of fewer than %s bases will also"
	" display a histogram)</P>\n", MAX_WINDOW_ALLOW_STRING);
    }

wiggleDataStreamFree(&wds);
}