void wigFilterStatRow(struct sqlConnection *conn) /* Put row in statistics table that says what wig filter is on. */ { hPrintf("<TR><TD>filter</TD><TD ALIGN=RIGHT>"); if (anyFilter()) wigShowFilter(conn); else hPrintf("off"); hPrintf("</TD></TR>\n"); }
int bigWigOutRegion(char *table, struct sqlConnection *conn, struct region *region, int maxOut, enum wigOutputType wigOutType) /* Write out bigWig for region, doing intersecting and filtering as need be. */ { boolean isMerged = anySubtrackMerge(table, database); int resultCount = 0; char *wigFileName = bigWigFileName(table, conn); if (wigFileName) { struct bbiFile *bwf = bigWigFileOpen(wigFileName); if (bwf) { /* Easy case, just dump out data. */ if (!anyFilter() && !anyIntersection() && !isMerged && wigOutType == wigOutData) resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end, maxOut, stdout); /* Pretty easy case, still do it ourselves. */ else if (!isMerged && wigOutType == wigOutData) { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); struct lm *lm = lmInit(0); struct bbiInterval *ivList, *iv; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount) { fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val); } lmCleanup(&lm); } /* Harder cases - resort to making a data vector and letting that machinery handle it. */ else { struct dataVector *dv = bigWigDataVector(table, conn, region); resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL); dataVectorFree(&dv); } } bbiFileClose(&bwf); } freeMem(wigFileName); return resultCount; }
void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from Big Bed. If fields is NULL, then print out all fields. */ { if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bigBedGetFields(table, conn); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) hashAddInt(fieldHash, bb->name, i); // If bigBed has name column, look up pasted/uploaded identifiers if any: struct hash *idHash = NULL; if (slCount(bbList) >= 4) idHash = identifierHash(db, table); /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); /* Open up bigBed file. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, lm); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; for (iv = ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); } } lmCleanup(&lm); } /* Clean up and exit. */ bbiFileClose(&bbi); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from BAM. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bamGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); char *fileName = bamFileName(table, conn, region->chrom); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); maxOut --; } } freeMem(fileName); lmCleanup(&lm); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void doSummaryStatsBed(struct sqlConnection *conn) /* Put up page showing summary stats for track that is in database * or that is bed-format custom. */ { struct bed *bedList = NULL; struct region *regionList = getRegions(), *region; char *regionName = getRegionName(); long long regionSize = 0, gapTotal = 0, realSize = 0; long startTime, midTime, endTime; long loadTime = 0, calcTime = 0, freeTime = 0; struct covStats *itemCovList = NULL, *blockCovList = NULL, *cov; int itemCount = 0; struct hTableInfo *hti = getHti(database, curTable, conn); int minScore = BIGNUM, maxScore = -BIGNUM; long long sumScores = 0; boolean hasBlocks = hti->hasBlocks; boolean hasScore = (hti->scoreField[0] != 0); int fieldCount; htmlOpen("%s (%s) Summary Statistics", curTableLabel(), curTable); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(64*1024); startTime = clock1000(); bedList = cookedBedList(conn, curTable, region, lm, &fieldCount); if (fieldCount < 12) hasBlocks = FALSE; if (fieldCount < 5) hasScore = FALSE; midTime = clock1000(); loadTime += midTime - startTime; if (bedList != NULL) { itemCount += slCount(bedList); regionSize += region->end - region->start; cov = calcSpanOverRegion(region, bedList); slAddHead(&itemCovList, cov); if (hasBlocks) { cov = calcBlocksOverRegion(region, bedList); slAddHead(&blockCovList, cov); } if (hti->scoreField[0] != 0) { struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) { int score = bed->score; if (score < minScore) minScore = score; if (score > maxScore) maxScore = score; sumScores += score; } } } endTime = clock1000(); calcTime += endTime - midTime; lmCleanup(&lm); bedList = NULL; freeTime += clock1000() - endTime; } regionSize = basesInRegion(regionList, 0); gapTotal = gapsInRegion(conn, regionList, 0); realSize = regionSize - gapTotal; hTableStart(); startTime = clock1000(); numberStatRow("item count", itemCount); if (itemCount > 0) { cov = covStatsSum(itemCovList); percentStatRow("item bases", cov->basesCovered, realSize); percentStatRow("item total", cov->sumBases, realSize); numberStatRow("smallest item", cov->minBases); numberStatRow("average item", round((double)cov->sumBases/cov->itemCount)); numberStatRow("biggest item", cov->maxBases); } if (hasBlocks && itemCount > 0) { cov = covStatsSum(blockCovList); hPrintf("<TR><TD>block count</TD><TD ALIGN=RIGHT>"); printLongWithCommas(stdout, cov->itemCount); hPrintf("</TD></TR>\n"); percentStatRow("block bases", cov->basesCovered, realSize); percentStatRow("block total", cov->sumBases, realSize); numberStatRow("smallest block", cov->minBases); numberStatRow("average block", round((double)cov->sumBases/cov->itemCount)); numberStatRow("biggest block", cov->maxBases); } if (hasScore != 0 && itemCount > 0 && sumScores != 0) { numberStatRow("smallest score", minScore); numberStatRow("average score", round((double)sumScores/itemCount)); numberStatRow("biggest score", maxScore); } hTableEnd(); /* Show region and time stats part of stats page. */ webNewSection("Region and Timing Statistics"); hTableStart(); stringStatRow("region", regionName); numberStatRow("bases in region", regionSize); numberStatRow("bases in gaps", gapTotal); floatStatRow("load time", 0.001*loadTime); floatStatRow("calculation time", 0.001*calcTime); floatStatRow("free memory time", 0.001*freeTime); stringStatRow("filter", (anyFilter() ? "on" : "off")); stringStatRow("intersection", (anyIntersection() ? "on" : "off")); hTableEnd(); covStatsFreeList(&itemCovList); covStatsFreeList(&blockCovList); htmlClose(); }
char *filterClause(char *db, char *table, char *chrom, char *extraClause) /* Get filter clause (something to put after 'where') * for table */ { struct sqlConnection *conn = NULL; char varPrefix[128]; int varPrefixSize, fieldNameSize; struct hashEl *varList, *var; struct dyString *dy = NULL; boolean needAnd = FALSE; char oldDb[128]; char dbTableBuf[256]; char explicitDb[128]; char splitTable[256]; char explicitDbTable[512]; /* Return just extraClause (which may be NULL) if no filter on us. */ if (! (anyFilter() && filteredOrLinked(db, table))) return cloneString(extraClause); safef(oldDb, sizeof(oldDb), "%s", db); dbOverrideFromTable(dbTableBuf, &db, &table); if (!sameString(oldDb, db)) safef(explicitDb, sizeof(explicitDb), "%s.", db); else explicitDb[0] = 0; /* Cope with split table and/or custom tracks. */ if (isCustomTrack(table)) { conn = hAllocConn(CUSTOM_TRASH); struct customTrack *ct = ctLookupName(table); safef(explicitDbTable, sizeof(explicitDbTable), "%s", ct->dbTableName); } else { conn = hAllocConn(db); safef(splitTable, sizeof(splitTable), "%s_%s", chrom, table); if (!sqlTableExists(conn, splitTable)) safef(splitTable, sizeof(splitTable), "%s", table); safef(explicitDbTable, sizeof(explicitDbTable), "%s%s", explicitDb, splitTable); } /* Get list of filter variables for this table. */ safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table); varPrefixSize = strlen(varPrefix); varList = cartFindPrefix(cart, varPrefix); if (varList == NULL) { hFreeConn(&conn); return cloneString(extraClause); } /* Create filter clause string, stepping through vars. */ dy = dyStringNew(0); for (var = varList; var != NULL; var = var->next) { /* Parse variable name into field and type. */ char field[64], *s, *type; s = var->name + varPrefixSize; type = strchr(s, '.'); if (type == NULL) internalErr(); fieldNameSize = type - s; if (fieldNameSize >= sizeof(field)) internalErr(); memcpy(field, s, fieldNameSize); field[fieldNameSize] = 0; sqlCkId(field); type += 1; /* rawLogic and rawQuery are handled below; * filterMaxOutputVar is not really a filter variable and is handled * in wiggle.c. */ if (startsWith("raw", type) || sameString(filterMaxOutputVar, type)) continue; /* Any other variables that are missing a name: * <varPrefix>..<type> * are illegal */ if (fieldNameSize < 1) { warn("Missing name in cart variable: %s\n", var->name); continue; } if (sameString(type, filterDdVar)) { char *patVar = filterPatternVarName(db, table, field); struct slName *patList = cartOptionalSlNameList(cart, patVar); normalizePatList(&patList); if (slCount(patList) > 0) { char *ddVal = cartString(cart, var->name); boolean neg = sameString(ddVal, ddOpMenu[1]); char *fieldType = getSqlType(conn, explicitDbTable, field); boolean needOr = FALSE; if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (neg) dyStringAppend(dy, "not "); boolean composite = (slCount(patList) > 1); if (composite || neg) dyStringAppendC(dy, '('); struct slName *pat; for (pat = patList; pat != NULL; pat = pat->next) { char *sqlPat = sqlLikeFromWild(pat->name); if (needOr) dyStringAppend(dy, " OR "); needOr = TRUE; if (isSqlSetType(fieldType)) { sqlDyStringPrintfFrag(dy, "FIND_IN_SET('%s', %s.%s)>0 ", sqlPat, explicitDbTable , field); } else { sqlDyStringPrintfFrag(dy, "%s.%s ", explicitDbTable, field); if (sqlWildcardIn(sqlPat)) dyStringAppend(dy, "like "); else dyStringAppend(dy, "= "); sqlDyStringPrintf(dy, "'%s'", sqlPat); } freez(&sqlPat); } if (composite || neg) dyStringAppendC(dy, ')'); } } else if (sameString(type, filterCmpVar)) { char *patVar = filterPatternVarName(db, table, field); char *pat = trimSpaces(cartOptionalString(cart, patVar)); char *cmpVal = cartString(cart, var->name); if (cmpReal(pat, cmpVal)) { if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (sameString(cmpVal, "in range")) { char *words[2]; int wordCount; char *dupe = cloneString(pat); wordCount = chopString(dupe, ", \t\n", words, ArraySize(words)); if (wordCount < 2) /* Fake short input */ words[1] = "2000000000"; if (strchr(pat, '.')) /* Assume floating point */ { double a = atof(words[0]), b = atof(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %f && %s.%s <= %f", explicitDbTable, field, a, explicitDbTable, field, b); } else { int a = atoi(words[0]), b = atoi(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %d && %s.%s <= %d", explicitDbTable, field, a, explicitDbTable, field, b); } freez(&dupe); } else { // cmpVal has been checked already above in cmpReal for legal values. sqlDyStringPrintfFrag(dy, "%s.%s %-s ", explicitDbTable, field, cmpVal); if (strchr(pat, '.')) /* Assume floating point. */ dyStringPrintf(dy, "%f", atof(pat)); else dyStringPrintf(dy, "%d", atoi(pat)); } } } } /* Handle rawQuery if any */ { char *varName; char *logic, *query; varName = filterFieldVarName(db, table, "", filterRawLogicVar); logic = cartUsualString(cart, varName, logOpMenu[0]); varName = filterFieldVarName(db, table, "", filterRawQueryVar); query = trimSpaces(cartOptionalString(cart, varName)); if (query != NULL && query[0] != 0) { if (needAnd) dyStringPrintf(dy, " %s ", logic); sqlSanityCheckWhere(query, dy); } } /* Clean up and return */ hFreeConn(&conn); hashElFreeList(&varList); if (dy->stringSize == 0) { dyStringFree(&dy); return cloneString(extraClause); } else { if (isNotEmpty(extraClause)) dyStringPrintf(dy, " and %s", extraClause); return dyStringCannibalize(&dy); } }
struct joinerDtf *filteringTables() /* Get list of tables we're filtering on as joinerDtf list (with * the field entry NULL). */ { if (!anyFilter()) return NULL; else { struct joinerDtf *dtfList = NULL, *dtf; struct hashEl *varList, *var; struct hash *uniqHash = hashNew(0); int prefixSize = strlen(hgtaFilterVarPrefix); varList = cartFindPrefix(cart, hgtaFilterVarPrefix); for (var = varList; var != NULL; var = var->next) { char *dupe = cloneString(var->name + prefixSize); char *parts[5]; int partCount; char dbTable[256]; char *db, *table, *field, *type; partCount = chopByChar(dupe, '.', parts, ArraySize(parts)); if (partCount != 4) { warn("Part count != expected 4 line %d of %s", __LINE__, __FILE__); continue; } db = parts[0]; table = parts[1]; field = parts[2]; type = parts[3]; safef(dbTable, sizeof(dbTable), "%s.%s", db, table); if (! filteredOrLinked(db, table)) continue; if (!hashLookup(uniqHash, dbTable)) { boolean gotFilter = FALSE; if (sameString(type, filterPatternVar)) { char *pat = trimSpaces(var->val); gotFilter = wildReal(pat); } else if (sameString(type, filterCmpVar)) { char *patVar = filterPatternVarName(db, table, field); char *pat = trimSpaces(cartOptionalString(cart, patVar)); gotFilter = cmpReal(pat, var->val); } else if (sameString(type, filterRawQueryVar)) { char *pat = trimSpaces(var->val); gotFilter = (pat != NULL && pat[0] != 0); } if (gotFilter) { hashAdd(uniqHash, dbTable, NULL); AllocVar(dtf); dtf->database = cloneString(db); dtf->table = cloneString(table); slAddHead(&dtfList, dtf); } } freeMem(dupe); } hashFree(&uniqHash); return dtfList; } }
void showMainControlTable(struct sqlConnection *conn) /* Put up table with main controls for main page. */ { struct grp *selGroup; boolean isWig = FALSE, isPositional = FALSE, isMaf = FALSE, isBedGr = FALSE, isChromGraphCt = FALSE, isPal = FALSE, isArray = FALSE, isBam = FALSE, isVcf = FALSE, isHalSnake = FALSE, isLongTabix = FALSE; boolean gotClade = hGotClade(); struct hTableInfo *hti = NULL; hPrintf("<TABLE BORDER=0>\n"); /* Print clade, genome and assembly line. */ { if (gotClade) { hPrintf("<TR><TD><B>clade:</B>\n"); printCladeListHtml(hGenome(database), onChangeClade()); nbSpaces(3); hPrintf("<B>genome:</B>\n"); printGenomeListForCladeHtml(database, onChangeOrg()); } else { hPrintf("<TR><TD><B>genome:</B>\n"); printGenomeListHtml(database, onChangeOrg()); } nbSpaces(3); hPrintf("<B>assembly:</B>\n"); printAssemblyListHtml(database, onChangeDb()); hPrintf("</TD></TR>\n"); } /* Print group and track line. */ { hPrintf("<TR><TD>"); selGroup = showGroupField(hgtaGroup, onChangeGroupOrTrack(), conn, hAllowAllTables()); nbSpaces(3); curTrack = showTrackField(selGroup, hgtaTrack, onChangeGroupOrTrack(), FALSE); nbSpaces(3); boolean hasCustomTracks = FALSE; struct trackDb *t; for (t = fullTrackList; t != NULL; t = t->next) { if (isCustomTrack(t->table)) { hasCustomTracks = TRUE; break; } } hOnClickButton("document.customTrackForm.submit();return false;", hasCustomTracks ? CT_MANAGE_BUTTON_LABEL : CT_ADD_BUTTON_LABEL); hPrintf(" "); if (hubConnectTableExists()) hOnClickButton("document.trackHubForm.submit();return false;", "track hubs"); hPrintf("</TD></TR>\n"); } /* Print table line. */ { hPrintf("<TR><TD>"); curTable = showTableField(curTrack, hgtaTable, TRUE); if (isHubTrack(curTable) || (strchr(curTable, '.') == NULL)) /* In same database */ { hti = getHti(database, curTable, conn); isPositional = htiIsPositional(hti); } isLongTabix = isLongTabixTable( curTable); isBam = isBamTable( curTable); isVcf = isVcfTable(curTable, NULL); isWig = isWiggle(database, curTable); if (isBigWigTable(curTable)) { isPositional = TRUE; isWig = TRUE; } isHalSnake = isHalTable( curTable); isMaf = isMafTable(database, curTrack, curTable); isBedGr = isBedGraph(curTable); isArray = isMicroarray(curTrack, curTable); struct trackDb *tdb = findTdbForTable(database, curTrack, curTable, ctLookupName); isPal = isPalCompatible(conn, tdb, curTable); nbSpaces(1); if (isCustomTrack(curTable)) { isChromGraphCt = isChromGraph(tdb); } cgiMakeButton(hgtaDoSchema, "describe table schema"); hPrintf("</TD></TR>\n"); } if (curTrack == NULL) { struct trackDb *tdb = hTrackDbForTrack(database, curTable); struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb); if (cTdb) curTrack = cTdb; else curTrack = tdb; isMaf = isMafTable(database, curTrack, curTable); } /* Region line */ { char *regionType = cartUsualString(cart, hgtaRegionType, hgtaRegionTypeGenome); char *range = cartUsualString(cart, hgtaRange, ""); if (isPositional) { boolean doEncode = FALSE; if (!trackHubDatabase(database)) doEncode = sqlTableExists(conn, "encodeRegions"); hPrintf("<TR><TD><B>region:</B>\n"); /* If regionType not allowed force it to "genome". */ if ((sameString(regionType, hgtaRegionTypeUserRegions) && userRegionsFileName() == NULL) || (sameString(regionType, hgtaRegionTypeEncode) && !doEncode)) regionType = hgtaRegionTypeGenome; // Is "genome" is not allowed because of tdb 'tableBrowser noGenome'? boolean disableGenome = ((curTrack && cartTrackDbIsNoGenome(database, curTrack->table)) || (curTable && cartTrackDbIsNoGenome(database, curTable))); // If "genome" is selected but not allowed, force it to "range": if (sameString(regionType, hgtaRegionTypeGenome) && disableGenome) regionType = hgtaRegionTypeRange; jsTrackingVar("regionType", regionType); if (disableGenome) { makeRegionButtonExtraHtml(hgtaRegionTypeGenome, regionType, "DISABLED"); hPrintf(" <span"NO_GENOME_CLASS">genome (unavailable for selected track)</span>" " "); } else { makeRegionButton(hgtaRegionTypeGenome, regionType); hPrintf(" genome "); } if (doEncode) { makeRegionButton(hgtaRegionTypeEncode, regionType); hPrintf(" ENCODE Pilot regions "); } makeRegionButton(hgtaRegionTypeRange, regionType); hPrintf(" position "); hPrintf("<INPUT TYPE=TEXT NAME=\"%s\" SIZE=26 VALUE=\"%s\" onFocus=\"%s\">\n", hgtaRange, range, jsRadioUpdate(hgtaRegionType, "regionType", "range")); cgiMakeButton(hgtaDoLookupPosition, "lookup"); hPrintf(" "); if (userRegionsFileName() != NULL) { makeRegionButton(hgtaRegionTypeUserRegions, regionType); hPrintf(" defined regions "); cgiMakeButton(hgtaDoSetUserRegions, "change"); hPrintf(" "); cgiMakeButton(hgtaDoClearUserRegions, "clear"); } else cgiMakeButton(hgtaDoSetUserRegions, "define regions"); hPrintf("</TD></TR>\n"); } else { /* Need to put at least stubs of cgi variables in for JavaScript to work. */ jsTrackingVar("regionType", regionType); cgiMakeHiddenVar(hgtaRange, range); cgiMakeHiddenVar(hgtaRegionType, regionType); } /* Select identifiers line (if applicable). */ if (!isWig && getIdField(database, curTrack, curTable, hti) != NULL) { hPrintf("<TR><TD><B>identifiers (names/accessions):</B>\n"); cgiMakeButton(hgtaDoPasteIdentifiers, "paste list"); hPrintf(" "); cgiMakeButton(hgtaDoUploadIdentifiers, "upload list"); if (identifierFileName() != NULL) { hPrintf(" "); cgiMakeButton(hgtaDoClearIdentifiers, "clear list"); } hPrintf("</TD></TR>\n"); } } /* microarray options */ /* button for option page here (median/log-ratio, etc) */ /* Filter line. */ { hPrintf("<TR><TD><B>filter:</B>\n"); if (anyFilter()) { cgiMakeButton(hgtaDoFilterPage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearFilter, "clear"); if (isWig || isBedGr) wigShowFilter(conn); } else { cgiMakeButton(hgtaDoFilterPage, "create"); } hPrintf("</TD></TR>\n"); } /* Composite track subtrack merge line. */ boolean canSubtrackMerge = (curTrack && tdbIsComposite(curTrack) && !isBam && !isVcf && !isLongTabix); if (canSubtrackMerge) { hPrintf("<TR><TD><B>subtrack merge:</B>\n"); if (anySubtrackMerge(database, curTable)) { cgiMakeButton(hgtaDoSubtrackMergePage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearSubtrackMerge, "clear"); } else { cgiMakeButton(hgtaDoSubtrackMergePage, "create"); } hPrintf("</TD></TR>\n"); } /* Intersection line. */ if (isPositional) { if (anyIntersection()) { hPrintf("<TR><TD><B>intersection with %s:</B>\n", cartString(cart, hgtaIntersectTable)); cgiMakeButton(hgtaDoIntersectPage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearIntersect, "clear"); hPrintf("</TD></TR>\n"); } else if (canIntersect(database, curTable)) { hPrintf("<TR><TD><B>intersection:</B>\n"); cgiMakeButton(hgtaDoIntersectPage, "create"); hPrintf("</TD></TR>\n"); } } /* Correlation line. */ struct trackDb *tdb = findTdbForTable(database, curTrack, curTable, ctLookupName); if (correlateTrackTableOK(tdb, curTable)) { char *table2 = cartUsualString(cart, hgtaCorrelateTable, "none"); hPrintf("<TR><TD><B>correlation:</B>\n"); if (differentWord(table2, "none") && strlen(table2) && ! isNoGenomeDisabled(database, table2)) { struct grp *groupList = fullGroupList; struct grp *selGroup = findSelectedGroup(groupList, hgtaCorrelateGroup); struct trackDb *tdb2 = findSelectedTrack(fullTrackList, selGroup,hgtaCorrelateTrack); if (tdbIsComposite(tdb2)) { struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(tdb2->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { struct trackDb *subTdb = tdbRef->val; if (sameString(table2, subTdb->table)) { tdb2 = subTdb; break; } } slFreeList(&tdbRefList); } cgiMakeButton(hgtaDoCorrelatePage, "calculate"); cgiMakeButton(hgtaDoClearCorrelate, "clear"); if (tdb2 && tdb2->shortLabel) hPrintf(" (with: %s)", tdb2->shortLabel); #ifdef NOT_YET /* debugging dbg vvvvv */ if (curTrack && curTrack->type) /* dbg */ { hPrintf("<BR> (debug: '%s', '%s(%s)')", curTrack->type, tdb2->type, table2); } /* debugging debug ^^^^^ */ #endif } else cgiMakeButton(hgtaDoCorrelatePage, "create"); hPrintf("</TD></TR>\n"); } /* Print output type line. */ showOutputTypeRow(isWig, isBedGr, isPositional, isMaf, isChromGraphCt, isPal, isArray, isHalSnake); /* Print output destination line. */ { char *compressType = cartUsualString(cart, hgtaCompressType, textOutCompressNone); char *fileName = cartUsualString(cart, hgtaOutFileName, ""); hPrintf("<TR><TD>\n"); hPrintf("<B>output file:</B> "); cgiMakeTextVar(hgtaOutFileName, fileName, 29); hPrintf(" (leave blank to keep output in browser)</TD></TR>\n"); hPrintf("<TR><TD>\n"); hPrintf("<B>file type returned: </B>"); cgiMakeRadioButton(hgtaCompressType, textOutCompressNone, sameWord(textOutCompressNone, compressType)); hPrintf(" plain text  "); cgiMakeRadioButton(hgtaCompressType, textOutCompressGzip, sameWord(textOutCompressGzip, compressType)); hPrintf(" gzip compressed"); hPrintf("</TD></TR>\n"); } hPrintf("</TABLE>\n"); /* Submit buttons. */ { hPrintf("<BR>\n"); if (isWig || isBam || isVcf || isLongTabix) { char *name; extern char *maxOutMenu[]; char *maxOutput = maxOutMenu[0]; if (isCustomTrack(curTable)) name=filterFieldVarName("ct", curTable, "_", filterMaxOutputVar); else name=filterFieldVarName(database,curTable, "_",filterMaxOutputVar); maxOutput = cartUsualString(cart, name, maxOutMenu[0]); if (isWig) hPrintf( "<I>Note: to return more than %s lines, change the filter setting" " (above). The entire data set may be available for download as" " a very large file that contains the original data values (not" " compressed into the wiggle format) -- see the Downloads page." "</I><BR>", maxOutput); else if (isBam || isVcf || isLongTabix) hPrintf( "<I>Note: to return more than %s lines, change the filter setting" " (above). Please consider downloading the entire data from our Download pages." "</I><BR>", maxOutput); } else if (anySubtrackMerge(database, curTable) || anyIntersection()) { hPrintf("<I>Note: The all fields and selected fields output formats " "are not available when a%s has been specified.</I><BR>", canSubtrackMerge ? " subtrack merge or intersection" : "n intersection"); } cgiMakeButton(hgtaDoTopSubmit, "get output"); hPrintf(" "); if (isPositional || isWig) { cgiMakeButton(hgtaDoSummaryStats, "summary/statistics"); hPrintf(" "); } #ifdef SOMETIMES hPrintf(" "); cgiMakeButton(hgtaDoTest, "test"); #endif /* SOMETIMES */ } hPrintf("<P>" "To reset <B>all</B> user cart settings (including custom tracks), \n" "<A HREF=\"/cgi-bin/cartReset?destination=%s\">click here</A>.\n", getScriptName()); }
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = vcfGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } // If we are outputting a subset of fields, invalidate the VCF header. boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS); if (!allFields) fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n"); struct asObject *as = vcfAsObj(); struct asFilter *filter = NULL; if (anyFilter()) filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(tdb, conn, table, region->chrom); struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { fprintf(f, "%s", vcff->headerString); if (filter) fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); if (!allFields) { fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); } printedHeader = TRUE; } char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; for (rec = vcff->records; rec != NULL && (maxOut > 0); rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL)) continue; // All fields output: after asFilter'ing, preserve original VCF chrom if (allFields && !sameString(rec->chrom, region->chrom)) row[0] = rec->chrom; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) { fprintf(f, "\t%s", row[columnArray[i]]); } fprintf(f, "\n"); maxOut --; } } vcfFileFree(&vcff); freeMem(fileName); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void doSummaryStatsBigWig(struct sqlConnection *conn) /* Put up page showing summary stats for bigWig track. */ { struct trackDb *track = curTrack; char *table = curTable; char *shortLabel = (track == NULL ? table : track->shortLabel); char *fileName = bigWigFileName(table, conn); long startTime = clock1000(); htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this " "page (not implemented yet). Statistics shown here are only for " "the primary table %s (%s).</EM>", shortLabel, table); struct bbiFile *bwf = bigWigFileOpen(fileName); struct region *region, *regionList = getRegions(); double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0; bits64 validCount = 0; if (!anyFilter() && !anyIntersection()) { for (region = regionList; region != NULL; region = region->next) { struct bbiSummaryElement sum; if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery, 1, &sum)) { if (validCount == 0) { minVal = sum.minVal; maxVal = sum.maxVal; } else { if (sum.minVal < minVal) minVal = sum.minVal; if (sum.maxVal > maxVal) maxVal = sum.maxVal; } sumData += sum.sumData; sumSquares += sum.sumSquares; validCount += sum.validCount; } } } else { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; double size = iv->end - iv->start; if (validCount == 0) minVal = maxVal = val; else { if (val < minVal) minVal = val; if (val > maxVal) maxVal = val; } sumData += size*val; sumSquares += size*val*val; validCount += size; } lmCleanup(&lm); } } hTableStart(); floatStatRow("mean", sumData/validCount); floatStatRow("min", minVal); floatStatRow("max", maxVal); floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount)); numberStatRow("bases with data", validCount); long long regionSize = basesInRegion(regionList,0); long long gapTotal = gapsInRegion(conn, regionList,0); numberStatRow("bases with sequence", regionSize - gapTotal); numberStatRow("bases in region", regionSize); wigFilterStatRow(conn); stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off")); long wigFetchTime = clock1000() - startTime; floatStatRow("load and calc time", 0.001*wigFetchTime); hTableEnd(); bbiFileClose(&bwf); htmlClose(); }