static void altSplicePrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out altSplicing info. */ { char *altId = section->items; char query[256]; struct sqlResult *sr; char **row; struct altGraphX *ag; char table[64]; boolean hasBin; hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, "altGraphX", table, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where name='%s'", table, altId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { ag = altGraphXLoad(row+hasBin); hPrintf("<TABLE><TR><TD BGCOLOR='#888888'>\n"); altGraphXMakeImage(ag); hPrintf("</TD></TR></TABLE><BR>"); } sqlFreeResult(&sr); hPrintf("This graph shows alternative splicing observed in mRNAs and " "ESTs that is either conserved in mouse, present in full length " "mRNAs, or observed at least three times in ESTs."); }
static struct slName *getExamples(char *db, struct sqlConnection *conn, char *table, char *field, int count) /* Return a list of several example values of table.field. */ { boolean isTabix = FALSE; if (isBamTable(table)) { assert(sameString(field, "qName")); return randomBamIds(table, conn, count); } else if (isBigBed(db, table, curTrack, ctLookupName)) { assert(sameString(field, "name")); return randomBigBedIds(table, conn, count); } else if (isVcfTable(table, &isTabix)) { assert(sameString(field, "id")); return randomVcfIds(table, conn, count, isTabix); } else { char fullTable[HDB_MAX_TABLE_STRING]; char *c = strchr(table, '.'); if (c || ! hFindSplitTable(database, NULL, table, fullTable, NULL)) safecpy(fullTable, sizeof(fullTable), table); return sqlRandomSampleConn(conn, fullTable, field, count); } }
void orTable(char *database, Bits *acc, char *track, char *chrom, int chromSize, struct sqlConnection *conn) /* Or in table if it exists. Else do nothing. */ { char t[512], *s; char table[HDB_MAX_TABLE_STRING]; isolateTrackPartOfSpec(track, t); s = strrchr(t, '.'); if (s != NULL) { orFile(acc, track, chrom, chromSize); } else { boolean hasBin; int minFeatureSize = optionInt("minFeatureSize", 0); boolean isSplit = hFindSplitTable(database, chrom, t, table, &hasBin); boolean isFound = hTableExists(database, table); verbose(3,"orTable: db: %s isFound: %s isSplit: %s %s %s %s\n", database, isFound ? "TRUE" : "FALSE", isSplit ? "TRUE" : "FALSE", chrom, t, table ); if (isFound) fbOrTableBitsQueryMinSize(database, acc, track, chrom, chromSize, conn, where, TRUE, TRUE, minFeatureSize); } }
static struct genePred *getCurGenePred(struct sqlConnection *conn) /* Return current gene in genePred. */ { char *track = genomeSetting("knownGene"); char table[HDB_MAX_TABLE_STRING]; boolean hasBin; char query[256]; struct sqlResult *sr; char **row; struct genePred *gp = NULL; if (!hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, sizeof table, &hasBin)) errAbort("track %s not found", track); bool hasAttrId = sqlColumnExists(conn, table, "alignId"); sqlSafef(query, sizeof(query), "select * from %s where name = '%s' " "and chrom = '%s' and txStart=%d and txEnd=%d" , table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { gp = genePredLoad(row + hasBin); #define ALIGNIDFIELD 11 // Gencode Id if (hasAttrId) curAlignId = cloneString(row[ALIGNIDFIELD]); else curAlignId = gp->name; } sqlFreeResult(&sr); if (gp == NULL) errAbort("getCurGenePred: Can't find %s", query); return gp; }
void doFlyreg(struct trackDb *tdb, char *item) /* flyreg.org: Drosophila DNase I Footprint db. */ { struct dyString *query = newDyString(256); struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr = NULL; char **row; int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); char fullTable[HDB_MAX_TABLE_STRING]; boolean hasBin = FALSE; char *motifTable = "flyregMotif"; struct dnaMotif *motif = NULL; boolean isVersion2 = sameString(tdb->table, "flyreg2"); genericHeader(tdb, item); if (!hFindSplitTable(database, seqName, tdb->table, fullTable, sizeof fullTable, &hasBin)) errAbort("track %s not found", tdb->table); sqlDyStringPrintf(query, "select * from %s where chrom = '%s' and ", fullTable, seqName); hAddBinToQuery(start, end, query); sqlDyStringPrintf(query, "chromStart = %d and name = '%s'", start, item); sr = sqlGetResult(conn, query->string); if ((row = sqlNextRow(sr)) != NULL) { struct flyreg2 fr; if (isVersion2) flyreg2StaticLoad(row+hasBin, &fr); else flyregStaticLoad(row+hasBin, (struct flyreg *)(&fr)); printf("<B>Factor:</B> %s<BR>\n", fr.name); printf("<B>Target:</B> %s<BR>\n", fr.target); if (isVersion2) printf("<B>Footprint ID:</B> %06d<BR>\n", fr.fpid); printf("<B>PubMed ID:</B> <A HREF=\""); printEntrezPubMedUidUrl(stdout, fr.pmid); printf("\" TARGET=_BLANK>%d</A><BR>\n", fr.pmid); bedPrintPos((struct bed *)(&fr), 3, tdb); if (hTableExists(database, motifTable)) { motif = loadDnaMotif(item, motifTable); if (motif != NULL) motifHitSection(NULL, motif); } } else errAbort("query returned no results: \"%s\"", query->string); dyStringFree(&query); sqlFreeResult(&sr); hFreeConn(&conn); if (motif != NULL) webNewSection("%s",tdb->longLabel); printTrackHtml(tdb); }
struct wiggleDataStream *wigChromRawStats(char *chrom) /* Fetch stats for wig data in chrom. * Returns a wiggleDataStream, free it with wiggleDataStreamFree() */ { char splitTableOrFileName[256]; struct customTrack *ct = NULL; boolean isCustom = FALSE; struct wiggleDataStream *wds = NULL; int operations = wigFetchRawStats; char *table = curTable; /* ct, isCustom, wds are set here */ if (isCustomTrack(table)) { ct = lookupCt(table); isCustom = TRUE; if (! ct->wiggle) errAbort("called to work on a custom track '%s' that isn't wiggle data ?", table); if (ct->dbTrack) safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", ct->dbTableName); else safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", ct->wigFile); } wds = wiggleDataStreamNew(); wds->setChromConstraint(wds, chrom); if (isCustom) { if (ct->dbTrack) wds->getData(wds, CUSTOM_TRASH, splitTableOrFileName, operations); else wds->getData(wds, NULL, splitTableOrFileName, operations); } else { boolean hasBin = FALSE; if (hFindSplitTable(database, chrom, table, splitTableOrFileName, &hasBin)) { wds->getData(wds, database, splitTableOrFileName, operations); } } return wds; }
void chromFeatureSeq(struct sqlConnection *conn, char *database, char *chrom, char *trackSpec, FILE *bedFile, FILE *faFile, int *retItemCount, int *retBaseCount) /* Write out sequence file for features from one chromosome. * This separate routine handles the non-merged case. It's * reason for being is so that the feature names get preserved. */ { boolean hasBin; char t[512], *s = NULL; char table[HDB_MAX_TABLE_STRING]; struct featureBits *fbList = NULL, *fb; if (trackSpec[0] == '!') errAbort("Sorry, '!' not available with fa output unless you use faMerge"); isolateTrackPartOfSpec(trackSpec, t); s = strchr(t, '.'); if (s != NULL) errAbort("Sorry, only database (not file) tracks allowed with " "fa output unless you use faMerge"); // ignore isSplit return from hFindSplitTable() (void) hFindSplitTable(database, chrom, t, table, &hasBin); fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom), where, TRUE, TRUE); for (fb = fbList; fb != NULL; fb = fb->next) { int s = fb->start, e = fb->end; if (bedFile != NULL) { fprintf(bedFile, "%s\t%d\t%d\t%s", fb->chrom, fb->start, fb->end, fb->name); if (fb->strand != '?') fprintf(bedFile, "\t0\t%c", fb->strand); fprintf(bedFile, "\n"); } if (faFile != NULL) { struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower); if (fb->strand == '-') reverseComplement(seq->dna, seq->size); faWriteNext(faFile, fb->name, seq->dna, seq->size); freeDnaSeq(&seq); } } featureBitsFreeList(&fbList); }
/* load one or more genePreds from the database */ struct genePred *getPredsForName(char *name, char *geneTable, char *db) { struct sqlConnection *conn = hAllocConn(db); struct genePred *list = NULL; char splitTable[HDB_MAX_TABLE_STRING]; struct genePred *gene; boolean hasBin; struct genePredReader *reader; boolean found = hFindSplitTable(db, NULL, geneTable, splitTable, &hasBin); if (!found) errAbort("can't find table %s\n", geneTable); char extra[2048]; if (onlyChrom != NULL) safef(extra, sizeof extra, "name='%s' and chrom='%s'", name, onlyChrom); else safef(extra, sizeof extra, "name='%s'", name); reader = genePredReaderQuery( conn, splitTable, extra); while ((gene = genePredReaderNext(reader)) != NULL) { verbose(2, "got gene %s\n",gene->name); slAddHead(&list, gene); } if (list == NULL) errAbort("no genePred for gene %s in %s\n",name, geneTable); slReverse(&list); genePredReaderFree(&reader); hFreeConn(&conn); return list; }
static struct psl *loadAlign(struct sqlConnection *conn, struct mappingInfo *mi, int start) /* load a psl that must exist */ { char rootTable[256], table[256], query[256]; boolean hasBin; struct sqlResult *sr; char **row; struct psl *psl; if (mi->suffix == NULL) safef(rootTable, sizeof(rootTable), "%s%sAli", mi->tblPre, mi->geneSet); else safef(rootTable, sizeof(rootTable), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix); hFindSplitTable(database, seqName, rootTable, table, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tStart = %d", table, mi->pg->name, start); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); psl = pslLoad(row+hasBin); sqlFreeResult(&sr); return psl; }
struct genePred *getCurGenePred(struct sqlConnection *conn) /* Return current gene in genePred. */ { char *track = genomeSetting("knownGene"); char table[64]; boolean hasBin; char query[256]; struct sqlResult *sr; char **row; struct genePred *gp = NULL; hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where name = '%s' " "and chrom = '%s' and txStart=%d and txEnd=%d" , table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) gp = genePredLoad(row + hasBin); sqlFreeResult(&sr); if (gp == NULL) errAbort("getCurGenePred: Can't find %s", query); return gp; }
struct chain *chainDbLoad(char *db, struct sqlConnection *conn, char *track, char *chrom, int id) /** Load chain. */ { char table[HDB_MAX_TABLE_STRING]; char query[256]; struct sqlResult *sr; char **row; int rowOffset; struct chain *chain = NULL; if (!hFindSplitTable(db, chrom, track, table, sizeof table, &rowOffset)) errAbort("No %s track in database", track); sqlSafef(query, sizeof(query), "select * from %s where id = %d", table, id); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) errAbort("Can't find %d in %s", id, table); chain = chainHeadLoad(row + rowOffset); sqlFreeResult(&sr); chainDbAddBlocks(chain, track, conn); return chain; }
static struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd) /* Load a list of psls given qName tName tStart tEnd */ { struct sqlResult *sr = NULL; char **row; struct psl *psl = NULL, *pslList = NULL; boolean hasBin; char splitTable[64]; char query[256]; struct sqlConnection *conn = hAllocConn(database); hFindSplitTable(database, seqName, table, splitTable, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s' and tEnd > %d and tStart < %d", splitTable, qName, tName, tStart, tEnd); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+hasBin); slAddHead(&pslList, psl); } sqlFreeResult(&sr); slReverse(&pslList); hFreeConn(&conn); return pslList; }
static int wigOutRegion(char *table, struct sqlConnection *conn, struct region *region, int maxOut, enum wigOutputType wigOutType, struct wigAsciiData **data, int spanConstraint) /* Write out wig data in region. Write up to maxOut elements. * Returns number of elements written. */ { int linesOut = 0; char splitTableOrFileName[HDB_MAX_TABLE_STRING]; struct customTrack *ct = NULL; boolean isCustom = FALSE; boolean hasConstraint = FALSE; struct wiggleDataStream *wds = NULL; unsigned long long valuesMatched = 0; int operations = wigFetchAscii; char *dataConstraint; double ll = 0.0; double ul = 0.0; char *table2 = NULL; struct bed *intersectBedList = NULL; switch (wigOutType) { case wigOutBed: operations = wigFetchBed; break; default: case wigDataNoPrint: case wigOutData: operations = wigFetchAscii; break; }; WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */ if (hasConstraint) freeMem(dataConstraint); /* been cloned into wds */ wds->setMaxOutput(wds, maxOut); wds->setChromConstraint(wds, region->chrom); wds->setPositionConstraint(wds, region->start, region->end); if (table2) intersectBedList = bedTable2(conn, region, table2); if (isCustom) { if (ct->dbTrack) { if (spanConstraint) wds->setSpanConstraint(wds,spanConstraint); else { struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH); struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); unsigned span = minSpan(trashConn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); hFreeConn(&trashConn); } valuesMatched = getWigglePossibleIntersection(wds, region, CUSTOM_TRASH, table2, &intersectBedList, splitTableOrFileName, operations); } else valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2, &intersectBedList, splitTableOrFileName, operations); } else { if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL)) { /* XXX TBD, watch for a span limit coming in as an SQL filter */ if (intersectBedList) { struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); unsigned span; span = minSpan(conn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); } else if (spanConstraint) wds->setSpanConstraint(wds,spanConstraint); valuesMatched = getWigglePossibleIntersection(wds, region, database, table2, &intersectBedList, splitTableOrFileName, operations); } } switch (wigOutType) { case wigDataNoPrint: if (data) { if (*data != NULL) /* no exercise of this function yet */ { /* data not null, add to existing list */ struct wigAsciiData *asciiData; struct wigAsciiData *next; for (asciiData = *data; asciiData; asciiData = next) { next = asciiData->next; slAddHead(&wds->ascii, asciiData); } } wds->sortResults(wds); *data = wds->ascii; /* moving the list to *data */ wds->ascii = NULL; /* gone as far as wds is concerned */ } linesOut = valuesMatched; break; case wigOutBed: linesOut = wds->bedOut(wds, "stdout", TRUE);/* TRUE == sort output */ break; default: case wigOutData: linesOut = wds->asciiOut(wds, database, "stdout", TRUE, FALSE); break; /* TRUE == sort output, FALSE == not raw data out */ }; wiggleDataStreamFree(&wds); return linesOut; } /* static int wigOutRegion() */
void checkInputExists(struct sqlConnection *conn,char *database, struct chromInfo *chromInfoList, int tableCount, char *tables[]) /* check input tables/files exist, especially to handle split tables */ { char *track=NULL; int i = 0, missing=0; char t[512], *s=NULL; char table[HDB_MAX_TABLE_STRING]; char fileName[512]; boolean found = FALSE; for (i=0; i<tableCount; ++i) { struct chromInfo *cInfo; track = tables[i]; if (track[0] == '!') { ++track; } isolateTrackPartOfSpec(track, t); s = strrchr(t, '.'); if (s) { if (fileExists(t)) continue; } else { if (NULL == conn) conn = hAllocConn(database); if (sqlTableExists(conn, t)) continue; } found = FALSE; for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { if (inclChrom(cInfo->chrom)) { if (s) { chromFileName(t, cInfo->chrom, fileName); if (fileExists(fileName)) { found = TRUE; break; } } else { boolean hasBin; if (hFindSplitTable(database, cInfo->chrom, t, table, &hasBin)) { found = TRUE; break; } } } } if (!found) { if (s) warn("file %s not found for any chroms", t); else warn("table %s not found for any chroms", t); ++missing; } } if (missing>0) errAbort("Error: %d input table(s)/file(s) do not exist for any of the chroms specified",missing); }
void doSummaryStatsWiggle(struct sqlConnection *conn) /* Put up page showing summary stats for wiggle track. */ { // grab the right trackDb for the current table. The curTrack variable // has the composite trackDb in it struct trackDb *track = hTrackDbForTrack(database, curTable); char *table = curTable; struct region *region, *regionList = getRegions(); char *regionName = getRegionName(); long long regionSize = 0; long long gapTotal = 0; long startTime = 0, wigFetchTime = 0; char splitTableOrFileName[HDB_MAX_TABLE_STRING]; struct customTrack *ct = NULL; boolean isCustom = FALSE; struct wiggleDataStream *wds = NULL; unsigned long long valuesMatched = 0; int regionCount = 0; int regionsDone = 0; unsigned span = 0; char *dataConstraint; double ll = 0.0; double ul = 0.0; boolean hasConstraint = FALSE; char *table2 = NULL; boolean fullGenome = FALSE; boolean statsHeaderDone = FALSE; boolean gotSome = FALSE; char *shortLabel = table; long long statsItemCount = 0; /* global accumulators for overall */ int statsSpan = 0; /* stats summary on a multiple region */ double statsSumData = 0.0; /* output */ double statsSumSquares = 0.0; /* " " */ double lowerLimit = INFINITY; /* " " */ double upperLimit = -1.0 * INFINITY; /* " " */ startTime = clock1000(); if (track != NULL) shortLabel = track->shortLabel; /* Count the regions, when only one, we can do more stats */ for (region = regionList; region != NULL; region = region->next) ++regionCount; htmlOpen("%s (%s) Wiggle Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this " "page (not implemented yet). Statistics shown here are only for " "the primary table %s (%s).</EM>", shortLabel, table); fullGenome = fullGenomeRegion(); WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */ for (region = regionList; region != NULL; region = region->next) { struct bed *intersectBedList = NULL; int operations; ++regionsDone; if (table2) intersectBedList = bedTable2(conn, region, table2); operations = wigFetchStats; #if defined(NOT) /* can't do the histogram now, that operation times out */ if (1 == regionCount) operations |= wigFetchAscii; #endif wds->setChromConstraint(wds, region->chrom); if (fullGenome) wds->setPositionConstraint(wds, 0, 0); else wds->setPositionConstraint(wds, region->start, region->end); if (hasConstraint) wds->setDataConstraint(wds, dataConstraint, ll, ul); /* depending on what is coming in on regionList, we may need to be * smart about how often we call getData for these custom tracks * since that is potentially a large file read each time. */ if (isCustom) { if (ct->dbTrack) { struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH); struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); span = minSpan(trashConn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); valuesMatched = getWigglePossibleIntersection(wds, region, CUSTOM_TRASH, table2, &intersectBedList, splitTableOrFileName, operations); hFreeConn(&trashConn); } else { valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2, &intersectBedList, splitTableOrFileName, operations); /* XXX We need to properly get the smallest span for custom tracks */ /* This is not necessarily the correct answer here */ if (wds->stats) span = wds->stats->span; else span = 1; } } else { if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL)) { span = minSpan(conn, splitTableOrFileName, region->chrom, region->start, region->end, cart, track); wds->setSpanConstraint(wds, span); valuesMatched = getWigglePossibleIntersection(wds, region, database, table2, &intersectBedList, splitTableOrFileName, operations); if (intersectBedList) span = 1; } } /* when doing multiple regions, we need to print out each result as * it happens to keep the connection open to the browser and * prevent any timeout since this could take a while. * (worst case test is quality track on panTro1) */ if (wds->stats) statsItemCount += wds->stats->count; if (wds->stats && (regionCount > 1) && (valuesMatched > 0)) { double sumData = wds->stats->mean * wds->stats->count; double sumSquares; if (wds->stats->count > 1) sumSquares = (wds->stats->variance * (wds->stats->count - 1)) + ((sumData * sumData)/wds->stats->count); else sumSquares = sumData * sumData; /* global accumulators for overall summary */ statsSpan = wds->stats->span; statsSumData += sumData; statsSumSquares += sumSquares; if (wds->stats->lowerLimit < lowerLimit) lowerLimit = wds->stats->lowerLimit; if ((wds->stats->lowerLimit + wds->stats->dataRange) > upperLimit) upperLimit = wds->stats->lowerLimit + wds->stats->dataRange; if (statsHeaderDone) wds->statsOut(wds, database, "stdout", TRUE, TRUE, FALSE, TRUE); else { wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, TRUE); statsHeaderDone = TRUE; } wds->freeStats(wds); gotSome = TRUE; } if ((regionCount > MAX_REGION_DISPLAY) && (regionsDone >= MAX_REGION_DISPLAY)) { hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> Can not display more " "than %d regions, <BR> would take too much time </TH></TR>\n", MAX_REGION_DISPLAY); break; /* exit this for loop */ } } /*for (region = regionList; region != NULL; region = region->next) */ if (hasConstraint) freeMem(dataConstraint); /* been cloned into wds */ if (1 == regionCount) { statsPreamble(wds, regionList->chrom, regionList->start, regionList->end, span, valuesMatched, table2); /* 3 X TRUE = sort results, html table output, with header, * the FALSE means close the table after printing, no more rows to * come. The case in the if() statement was already taken care of * in the statsPreamble() printout. No need to do that again. */ if ( ! ((valuesMatched == 0) && table2) ) wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE); regionSize = basesInRegion(regionList,0); gapTotal = gapsInRegion(conn, regionList,0); } else { /* this is a bit of a kludge here since these printouts are done in the * library source wigDataStream.c statsOut() function and * this is a clean up of that. That function should be * pulled out of there and made independent and more * versatile. */ long long realSize; double variance; double stddev; /* Too expensive to lookup the numbers for thousands of regions */ regionSize = basesInRegion(regionList,MAX_REGION_DISPLAY); gapTotal = gapsInRegion(conn, regionList,MAX_REGION_DISPLAY); realSize = regionSize - gapTotal; /* close the table which was left open in the loop above */ if (!gotSome) hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> No data found matching this request </TH></TR>\n"); hPrintf("<TR><TH ALIGN=LEFT> SUMMARY: </TH>\n"); hPrintf("\t<TD> </TD>\n"); /* chromStart */ hPrintf("\t<TD> </TD>\n"); /* chromEnd */ hPrintf("\t<TD ALIGN=RIGHT> "); printLongWithCommas(stdout, statsItemCount); hPrintf(" </TD>\n" ); hPrintf("\t<TD ALIGN=RIGHT> %d </TD>\n", statsSpan); hPrintf("\t<TD ALIGN=RIGHT> "); printLongWithCommas(stdout, statsItemCount*statsSpan); hPrintf(" (%.2f%%) </TD>\n", 100.0*(double)(statsItemCount*statsSpan)/(double)realSize); hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", lowerLimit); hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit); hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit - lowerLimit); if (statsItemCount > 0) hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", statsSumData/statsItemCount); else hPrintf("\t<TD ALIGN=RIGHT> 0.0 </TD>\n"); stddev = 0.0; variance = 0.0; if (statsItemCount > 1) { variance = (statsSumSquares - ((statsSumData * statsSumData)/(double) statsItemCount)) / (double) (statsItemCount - 1); if (variance > 0.0) stddev = sqrt(variance); } hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", variance); hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", stddev); hPrintf("</TR>\n"); wigStatsTableHeading(stdout, TRUE); hPrintf("</TABLE></TD></TR></TABLE></P>\n"); } #if defined(NOT) /* can't do the histogram now, that operation times out */ /* Single region, we can do the histogram */ if ((valuesMatched > 1) && (1 == regionCount)) { float *valuesArray = NULL; size_t valueCount = 0; struct histoResult *histoGramResult; /* convert the ascii data listings to one giant float array */ valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount); /* histoGram() may return NULL if it doesn't work */ histoGramResult = histoGram(valuesArray, valueCount, NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit, (float) (wds->stats->lowerLimit + wds->stats->dataRange), (struct histoResult *)NULL); printHistoGram(histoGramResult, TRUE); /* TRUE == html output */ freeHistoGram(&histoGramResult); wds->freeAscii(wds); wds->freeArray(wds); } #endif wds->freeStats(wds); wiggleDataStreamFree(&wds); wigFetchTime = clock1000() - startTime; webNewSection("Region and Timing Statistics"); hTableStart(); stringStatRow("region", regionName); numberStatRow("bases in region", regionSize); numberStatRow("bases in gaps", gapTotal); floatStatRow("load and calc time", 0.001*wigFetchTime); wigFilterStatRow(conn); stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off")); hTableEnd(); htmlClose(); } /* void doSummaryStatsWiggle(struct sqlConnection *conn) */
struct bed *getWiggleAsBed( char *db, char *table, /* Database and table. */ struct region *region, /* Region to get data for. */ char *filter, /* Filter to add to SQL where clause if any. */ struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */ struct lm *lm, /* Where to allocate memory. */ struct sqlConnection *conn) /* SQL connection to work with */ /* Return a bed list of all items in the given range in table. * Cleanup result via lmCleanup(&lm) rather than bedFreeList. */ /* filter, idHash and lm are currently unused, perhaps future use */ { struct bed *bedList=NULL; char splitTableOrFileName[HDB_MAX_TABLE_STRING]; struct customTrack *ct = NULL; boolean isCustom = FALSE; boolean hasConstraint = FALSE; struct wiggleDataStream *wds = NULL; unsigned long long valuesMatched = 0; int operations = wigFetchBed; char *dataConstraint; double ll = 0.0; double ul = 0.0; char *table2 = NULL; struct bed *intersectBedList = NULL; int maxOut; WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */ if (hasConstraint) freeMem(dataConstraint); /* been cloned into wds */ maxOut = bigFileMaxOutput(); wds->setMaxOutput(wds, maxOut); wds->setChromConstraint(wds, region->chrom); wds->setPositionConstraint(wds, region->start, region->end); if (table2) intersectBedList = bedTable2(conn, region, table2); if (isCustom) { if (ct->dbTrack) { unsigned span = 0; struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH); struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); valuesMatched = getWigglePossibleIntersection(wds, region, CUSTOM_TRASH, table2, &intersectBedList, splitTableOrFileName, operations); span = minSpan(trashConn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); hFreeConn(&trashConn); } else valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2, &intersectBedList, splitTableOrFileName, operations); } else { if (conn == NULL) errAbort( "getWiggleAsBed: NULL conn given for database table"); if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL)) { struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); unsigned span = 0; /* XXX TBD, watch for a span limit coming in as an SQL filter */ span = minSpan(conn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); valuesMatched = getWigglePossibleIntersection(wds, region, database, table2, &intersectBedList, splitTableOrFileName, operations); } } if (valuesMatched > 0) { struct bed *bed; wds->sortResults(wds); for (bed = wds->bed; bed != NULL; bed = bed->next) { struct bed *copy = lmCloneBed(bed, lm); slAddHead(&bedList, copy); } slReverse(&bedList); } wiggleDataStreamFree(&wds); return bedList; } /* struct bed *getWiggleAsBed() */
void genericWiggleClick(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start) /* Display details for Wiggle data tracks. * conn may be NULL for custom tracks when from file */ { char *chrom = cartString(cart, "c"); char table[64]; boolean hasBin; unsigned span = 0; struct wiggleDataStream *wds = wiggleDataStreamNew(); unsigned long long valuesMatched = 0; struct histoResult *histoGramResult; float *valuesArray = NULL; size_t valueCount = 0; struct customTrack *ct = NULL; boolean isCustom = FALSE; int operations = wigFetchStats; /* default operation */ if (startsWith("ct_", tdb->table)) { ct = lookupCt(tdb->table); if (!ct) { warn("<P>wiggleClick: can not find custom wiggle track '%s'</P>", tdb->table); return; } if (! ct->wiggle) { warn("<P>wiggleClick: called to do stats on a custom track that isn't wiggle data ?</P>"); return; } if (ct->dbTrack) { safef(table,ArraySize(table), "%s", ct->dbTableName); span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb); } else { safef(table,ArraySize(table), "%s", ct->wigFile); span = 0; /* cause all spans to be examined */ } isCustom = TRUE; } else { hFindSplitTable(database, seqName, tdb->table, table, &hasBin); /*span = spanInUse(conn, table, chrom, winStart, winEnd, cart);*/ span = minSpan(conn, table, chrom, winStart, winEnd, cart, tdb); } /* if for some reason we don't have a chrom and win positions, this * should be run in a loop that does one chrom at a time. In the * case of hgc, there seems to be a chrom and a position. */ wds->setSpanConstraint(wds, span); wds->setChromConstraint(wds, chrom); wds->setPositionConstraint(wds, winStart, winEnd); /* If our window is less than some number of points, we can do * the histogram too. */ #define MAX_WINDOW_ALLOW_STATS 100000001 #define MAX_WINDOW_ALLOW_STRING "100,000,000" if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS) operations |= wigFetchAscii; /* We want to also fetch the actual data values so we can run a * histogram function on them. You can't fetch the data in the * form of the data array since the span information is then lost. * We have to do the ascii data list format, and prepare that to * send to the histogram function. */ if (isCustom) { if (ct->dbTrack) valuesMatched = wds->getData(wds, CUSTOM_TRASH, table, operations); else valuesMatched = wds->getData(wds, (char *)NULL, table, operations); } else valuesMatched = wds->getData(wds, database, table, operations); statsPreamble(wds, chrom, winStart, winEnd, span, valuesMatched, NULL); /* output statistics table * (+sort, +html output, +with header, +close table) */ wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE); if ((winEnd - winStart) < MAX_WINDOW_ALLOW_STATS) { char *words[16]; int wordCount = 0; char *dupe = cloneString(tdb->type); double minY, maxY, tDbMinY, tDbMaxY; float hMin, hMax, hRange; wordCount = chopLine(dupe, words); wigFetchMinMaxY(tdb, &minY, &maxY, &tDbMinY, &tDbMaxY, wordCount, words); hMin = min(minY,tDbMinY); hMax = max(maxY,tDbMaxY); hRange = hMax - hMin; /* convert the ascii data listings to one giant float array */ valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount); /* let's see if we really want to use the range from the track type * line, or the actual range in this data. If there is a good * actual range in the data, use that instead */ if (hRange > 0.0) { if (wds->stats->dataRange != 0) hRange = 0.0; } /* If we have a valid range, use a specified 20 bin histogram * NOTE: pass 21 as binCount to get a 20 bin histogram */ if (hRange > 0.0) histoGramResult = histoGram(valuesArray, valueCount, (hRange/20.0), (unsigned) 21, hMin, hMin, hMax, (struct histoResult *)NULL); else histoGramResult = histoGram(valuesArray, valueCount, NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit, (float) (wds->stats->lowerLimit + wds->stats->dataRange), (struct histoResult *)NULL); /* histoGram() may return NULL if it doesn't work, that's OK, the * print out will indicate no results (TRUE == html output) */ printHistoGram(histoGramResult, TRUE); freeHistoGram(&histoGramResult); freeMem(valuesArray); } else { printf("<P>(viewing windows of fewer than %s bases will also" " display a histogram)</P>\n", MAX_WINDOW_ALLOW_STRING); } wiggleDataStreamFree(&wds); }