Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize, boolean isBpWise) /* Get a bitmap that corresponds to the table we are intersecting with. * Consult CGI vars to figure out what table it is. */ { boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE); char *table2 = cartString(cart, hgtaIntersectTable); struct hTableInfo *hti2 = getHti(database, table2, conn); struct lm *lm2 = lmInit(64*1024); Bits *bits2 = bitAlloc(chromSize+8); struct bed *bedList2; if (isBigWigTable(table2)) bedList2 = bigWigIntervalsToBed(conn, table2, region, lm2); else // We should go straight to raw beds here, not through the routines that // do filter & intersections, because the secondary table has no filter // and sure shouldn't be intersected. :) bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL); if (!isBpWise) expandZeroSize(bedList2, hti2->hasBlocks, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (invTable2) bitNot(bits2, chromSize); lmCleanup(&lm2); return bits2; }
void genomicFormatPage(struct sqlConnection *conn) /* Put up page asking for what sort of genomic sequence. */ { struct hTableInfo *hti = getHti(database, curTable, conn); htmlOpen("%s Genomic Sequence", curTableLabel()); if (doGalaxy()) startGalaxyForm(); else hPrintf("<FORM ACTION=\"%s\" METHOD=GET>\n", getScriptName()); cartSaveSession(cart); hgSeqOptionsHtiCart(hti, cart); hPrintf("<BR>\n"); if (doGalaxy()) { /* pass parameter to get sequence to Galaxy */ cgiMakeHiddenVar(hgtaDoGenomicDna, "get sequence"); printGalaxySubmitButtons(); } else { cgiMakeButton(hgtaDoGenomicDna, "get sequence"); hPrintf(" "); cgiMakeButton(hgtaDoMainPage, "cancel"); hPrintf("</FORM>"); } cgiDown(0.9); htmlClose(); }
struct bed *dbGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *dbVarName, char *table, char *tableVarName, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from database, in all regions, that pass filtering. */ { /* A joining query may be required if the filter incorporates linked tables. */ struct hTableInfo *hti = getHti(db, table, conn); struct slName *fieldList = getBedFieldSlNameList(hti, db, table); struct joinerDtf *dtfList = NULL; struct joinerDtf *filterTables = NULL; boolean doJoin = joinRequired(db, table, fieldList, &dtfList, &filterTables); struct region *region; struct bed *bedList = NULL; char *idField = getIdField(db, curTrack, table, hti); struct hash *idHash = identifierHash(db, table); if (! doJoin) { for (region = regionList; region != NULL; region = region->next) { char *identifierFilter = identifierWhereClause(idField, idHash); char *filter = filterClause(dbVarName, tableVarName, region->chrom, identifierFilter); struct bed *bedListRegion = getRegionAsMergedBed(dbVarName, tableVarName, region, filter, idHash, lm, retFieldCount); struct bed *bed, *nextBed; for (bed = bedListRegion; bed != NULL; bed = nextBed) { nextBed = bed->next; slAddHead(&bedList, bed); } freez(&filter); } slReverse(&bedList); } else { struct joiner *joiner = allJoiner; struct joinedTables *joined = joinedTablesCreate(joiner, db, table, dtfList, filterTables, 1000000, regionList); int bedFieldCount = hTableInfoBedFieldCount(hti); if (retFieldCount != NULL) *retFieldCount = bedFieldCount; bedList = joinedTablesToBed(joined, hti, bedFieldCount, lm); joinedTablesFree(&joined); } joinerDtfFreeList(&dtfList); joinerDtfFreeList(&filterTables); hashFree(&idHash); return bedList; }
void doOutGff(char *table, struct sqlConnection *conn, boolean outputGtf) /* Save as GFF/GTF. */ { struct hTableInfo *hti = getHti(database, table, conn); struct bed *bedList; struct hash *chromHash = NULL; struct slName *exonFramesList = NULL; char source[HDB_MAX_TABLE_STRING]; int itemCount; struct region *region, *regionList = getRegions(); textOpen(); int efIdx = -1; safef(source, sizeof(source), "%s_%s", database, table); if (conn) { boolean simpleTableExists = sqlTableExists(conn, table); // simpleTable means not split table, not custom track // However it still can include bbi table with bam fileName path if (simpleTableExists) // no tables having exonFrames are split tables anyway efIdx = sqlFieldIndex(conn, table, "exonFrames"); itemCount = 0; int regionCount = slCount(regionList); // regionList can have many thousands of items e.g. rheMac3 has 34000 chroms! // This regionCount threshold should be just above the # chroms in the latest human assembly if (simpleTableExists && (regionCount > 500)) { chromHash = makeChromHashForTable(conn, table); } } // Note: code could be added here to extract exonFrames from bigGenePred // Process each region for (region = regionList; region != NULL; region = region->next) { if (chromHash && (!hashFindVal(chromHash, region->chrom))) continue; struct lm *lm = lmInit(64*1024); int fieldCount; bedList = cookedBedList(conn, table, region, lm, &fieldCount); // Use exonFrames field if available for better accuracy instead of calculating from coordinates if (efIdx != -1) exonFramesList = getExonFrames(table, conn, bedList); itemCount += bedToGffLines(bedList, exonFramesList, hti, fieldCount, source, outputGtf); lmCleanup(&lm); } if (itemCount == 0) hPrintf(NO_RESULTS); }
void doGenomicDna(struct sqlConnection *conn) /* Get genomic sequence (UI has already told us how). */ { struct region *region, *regionList = getRegions(); struct hTableInfo *hti = getHti(database, curTable, conn); int fieldCount; textOpen(); int resultCount = 0; for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(64*1024); struct bed *bedList = cookedBedList(conn, curTable, region, lm, &fieldCount); if (bedList != NULL) resultCount += hgSeqBed(database, hti, bedList); lmCleanup(&lm); } if (!resultCount) hPrintf(NO_RESULTS); }
Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize, boolean isBpWise) /* Get a bitmap that corresponds to the table we are intersecting with. * Consult CGI vars to figure out what table it is. */ { boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE); char *table2 = cartString(cart, hgtaIntersectTable); struct hTableInfo *hti2 = getHti(database, table2, conn); struct lm *lm2 = lmInit(64*1024); Bits *bits2 = bitAlloc(chromSize+8); struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL); if (!isBpWise) expandZeroSize(bedList2, hti2->hasBlocks, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (invTable2) bitNot(bits2, chromSize); lmCleanup(&lm2); return bits2; }
boolean doGetBedOrCt(struct sqlConnection *conn, boolean doCt, boolean doCtFile, boolean redirectToGb) /* Actually output bed or custom track. Return TRUE unless no results. */ { char *db = cloneString(database); char *table = curTable; struct hTableInfo *hti = getHti(db, table, conn); struct featureBits *fbList = NULL, *fbPtr; struct customTrack *ctNew = NULL; boolean doCtHdr = (cartUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE) || doCt || doCtFile); char *ctWigOutType = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData); char *fbQual = fbOptionsToQualifier(); char fbTQ[128]; int fields = hTableInfoBedFieldCount(hti); boolean gotResults = FALSE; struct region *region, *regionList = getRegions(); boolean isBedGr = isBedGraph(curTable); boolean isBgWg = isBigWigTable(curTable); boolean needSubtrackMerge = anySubtrackMerge(database, curTable); boolean doDataPoints = FALSE; boolean isWig = isWiggle(database, table); struct wigAsciiData *wigDataList = NULL; struct dataVector *dataVectorList = NULL; boolean doRgb = bedItemRgb(hTrackDbForTrack(db, curTable)); if (!cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE) && !doCt) { textOpen(); } if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE)) fputs("#", stdout); if ((isWig || isBedGr || isBgWg) && sameString(outWigData, ctWigOutType)) doDataPoints = TRUE; for (region = regionList; region != NULL; region = region->next) { struct bed *bedList = NULL, *bed; struct lm *lm = lmInit(64*1024); struct dataVector *dv = NULL; if (isWig && doDataPoints) { if (needSubtrackMerge) { dv = wiggleDataVector(curTrack, curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else { int count = 0; struct wigAsciiData *wigData = NULL; struct wigAsciiData *asciiData; struct wigAsciiData *next; wigData = getWiggleAsData(conn, curTable, region); for (asciiData = wigData; asciiData; asciiData = next) { next = asciiData->next; if (asciiData->count) { slAddHead(&wigDataList, asciiData); ++count; } } slReverse(&wigDataList); } } else if (isBedGr && doDataPoints) { dv = bedGraphDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isBgWg && doDataPoints) { dv = bigWigDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isWig || isBgWg) { dv = wiggleDataVector(curTrack, curTable, conn, region); bedList = dataVectorToBedList(dv); dataVectorFree(&dv); } else if (isBedGr) { bedList = getBedGraphAsBed(conn, curTable, region); } else { bedList = cookedBedList(conn, curTable, region, lm, &fields); } /* this is a one-time only initial creation of the custom track * structure to receive the results. gotResults turns it off after * the first time. */ if (doCtHdr && !gotResults && ((bedList != NULL) || (wigDataList != NULL) || (dataVectorList != NULL))) { ctNew = beginCustomTrack(table, fields, doCt, (isWig || isBedGr || isBgWg), doDataPoints); } if (doDataPoints && (wigDataList || dataVectorList)) gotResults = TRUE; else { if ((fbQual == NULL) || (fbQual[0] == 0)) { for (bed = bedList; bed != NULL; bed = bed->next) { if (bed->name != NULL) { subChar(bed->name, ' ', '_'); } if (doCt) { struct bed *dupe = cloneBed(bed); /* Out of local memory. */ slAddHead(&ctNew->bedList, dupe); } else { if (doRgb) bedTabOutNitemRgb(bed, fields, stdout); else bedTabOutN(bed, fields, stdout); } gotResults = TRUE; } } else { safef(fbTQ, sizeof(fbTQ), "%s:%s", hti->rootName, fbQual); fbList = fbFromBed(db, fbTQ, hti, bedList, 0, 0, FALSE, FALSE); if (fields >= 6) fields = 6; else if (fields >= 4) fields = 4; else fields = 3; if (doCt && ctNew) { ctNew->fieldCount = fields; safef(ctNew->tdb->type, strlen(ctNew->tdb->type)+1, "bed %d", fields); } for (fbPtr=fbList; fbPtr != NULL; fbPtr=fbPtr->next) { if (fbPtr->name != NULL) { char *ptr = strchr(fbPtr->name, ' '); if (ptr != NULL) *ptr = 0; } if (doCt) { struct bed *fbBed = fbToBedOne(fbPtr); slAddHead(&ctNew->bedList, fbBed ); } else { if (fields >= 6) hPrintf("%s\t%d\t%d\t%s\t%d\t%c\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name, 0, fbPtr->strand); else if (fields >= 4) hPrintf("%s\t%d\t%d\t%s\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name); else hPrintf("%s\t%d\t%d\n", fbPtr->chrom, fbPtr->start, fbPtr->end); } gotResults = TRUE; } featureBitsFreeList(&fbList); } } bedList = NULL; lmCleanup(&lm); } if (!gotResults) { hPrintf(NO_RESULTS); } else if (doCt) { int wigDataSize = 0; /* Load existing custom tracks and add this new one: */ struct customTrack *ctList = getCustomTracks(); removeNamedCustom(&ctList, ctNew->tdb->table); if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); wigDataSize = dataVectorWriteWigAscii(dataVectorList, ctNew->wigAscii, 0, NULL); // TODO: see if can make prettier wig output here that // doesn't necessarily have one value per base } else { struct wiggleDataStream *wds = NULL; /* create an otherwise empty wds so we can print out the list */ wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wigDataSize = wds->asciiOut(wds, db, ctNew->wigAscii, TRUE, FALSE); #if defined(DEBUG) /* dbg */ /* allow file readability for debug */ chmod(ctNew->wigAscii, 0666); #endif wiggleDataStreamFree(&wds); } } else slReverse(&ctNew->bedList); slAddHead(&ctList, ctNew); /* Save the custom tracks out to file (overwrite the old file): */ customTracksSaveCart(db, cart, ctList); /* Put up redirect-to-browser page. */ if (redirectToGb) { char browserUrl[256]; char headerText[512]; int redirDelay = 3; safef(browserUrl, sizeof(browserUrl), "%s?%s&db=%s", hgTracksName(), cartSidUrlString(cart), database); safef(headerText, sizeof(headerText), "<META HTTP-EQUIV=\"REFRESH\" CONTENT=\"%d;URL=%s\">", redirDelay, browserUrl); webStartHeader(cart, database, headerText, "Table Browser: %s %s: %s", hOrganism(database), freezeName, "get custom track"); if (doDataPoints) { hPrintf("There are %d data points in custom track. ", wigDataSize); } else { hPrintf("There are %d items in custom track. ", slCount(ctNew->bedList)); } hPrintf("You will be automatically redirected to the genome browser in\n" "%d seconds, or you can \n" "<A HREF=\"%s\">click here to continue</A>.\n", redirDelay, browserUrl); } } else if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); dataVectorWriteWigAscii(dataVectorList, "stdout", 0, NULL); } else { /* create an otherwise empty wds so we can print out the list */ struct wiggleDataStream *wds = NULL; wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wds->asciiOut(wds, db, "stdout", TRUE, FALSE); wiggleDataStreamFree(&wds); } } return gotResults; }
void doBedOrCtOptions(char *table, struct sqlConnection *conn, boolean doCt) /* Put up form to get options on BED or custom track output. */ /* (Taken from hgText.c/doBedCtOptions) */ { char *table2 = NULL; /* For now... */ struct hTableInfo *hti = getHti(database, table, conn); char buf[256]; char *setting; htmlOpen("Output %s as %s", table, (doCt ? "Custom Track" : "BED")); if (doGalaxy()) startGalaxyForm(); else if (doGreat()) { verifyGreatAssemblies(); startGreatForm(); } else hPrintf("<FORM ACTION=\"%s\" METHOD=GET>\n", getScriptName()); cartSaveSession(cart); if (!doGreat()) { hPrintf("%s\n", "<TABLE><TR><TD>"); if (doCt) { hPrintf("%s\n", "</TD><TD>" "<A HREF=\"../goldenPath/help/customTrack.html\" TARGET=_blank>" "Custom track</A> header: </B>"); } else { cgiMakeCheckBox(hgtaPrintCustomTrackHeaders, cartCgiUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE)); hPrintf("%s\n", "</TD><TD> <B> Include " "<A HREF=\"../goldenPath/help/customTrack.html\" TARGET=_blank>" "custom track</A> header: </B>"); } hPrintf("%s\n", "</TD></TR><TR><TD></TD><TD>name="); safef(buf, sizeof(buf), "tb_%s", hti->rootName); setting = cgiUsualString(hgtaCtName, buf); cgiMakeTextVar(hgtaCtName, setting, 16); hPrintf("%s\n", "</TD></TR><TR><TD></TD><TD>description="); safef(buf, sizeof(buf), "table browser query on %s%s%s", table, (table2 ? ", " : ""), (table2 ? table2 : "")); setting = cgiUsualString(hgtaCtDesc, buf); cgiMakeTextVar(hgtaCtDesc, setting, 50); hPrintf("%s\n", "</TD></TR><TR><TD></TD><TD>visibility="); if (isWiggle(database, table) || isBigWigTable(table)) { setting = cartCgiUsualString(cart, hgtaCtVis, ctVisWigMenu[2]); cgiMakeDropList(hgtaCtVis, ctVisWigMenu, ctVisWigMenuSize, setting); } else { setting = cartCgiUsualString(cart, hgtaCtVis, ctVisMenu[3]); cgiMakeDropList(hgtaCtVis, ctVisMenu, ctVisMenuSize, setting); } hPrintf("%s\n", "</TD></TR><TR><TD></TD><TD>url="); setting = cartCgiUsualString(cart, hgtaCtUrl, ""); cgiMakeTextVar(hgtaCtUrl, setting, 50); hPrintf("%s\n", "</TD></TR><TR><TD></TD><TD>"); hPrintf("%s\n", "</TD></TR></TABLE>"); } if (isWiggle(database, table) || isBedGraph(table) || isBigWigTable(table) ) { char *setting = NULL; hPrintf("<P> <B> Select type of data output: </B> <BR>\n"); setting = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData); cgiMakeRadioButton(hgtaCtWigOutType, outWigBed, sameString(setting, outWigBed)); hPrintf("BED format (no data value information, only position)<BR>\n"); cgiMakeRadioButton(hgtaCtWigOutType, outWigData, sameString(setting, outWigData)); hPrintf("DATA VALUE format (position and real valued data)</P>\n"); } else { cgiDown(0.9); hPrintf("<B> Create one BED record per: </B>\n"); if ((anyIntersection() && intersectionIsBpWise()) || (anySubtrackMerge(database, table) && subtrackMergeIsBpWise())) { /* The original table may have blocks/CDS, described in hti, but * that info will be lost after base pair-wise operations. So make * a temporary copy of hti with its flags tweaked: */ struct hTableInfo simplifiedHti; memcpy(&simplifiedHti, hti, sizeof(simplifiedHti)); simplifiedHti.hasBlocks = FALSE; simplifiedHti.hasCDS = FALSE; fbOptionsHtiCart(&simplifiedHti, cart); } else fbOptionsHtiCart(hti, cart); } if (doCt) { if (doGalaxy()) { /* send the action parameter with the form as well */ cgiMakeHiddenVar(hgtaDoGetCustomTrackFile, "get custom track in file"); printGalaxySubmitButtons(); } else { cgiMakeButton(hgtaDoGetCustomTrackTb, "get custom track in table browser"); hPrintf(" "); cgiMakeButton(hgtaDoGetCustomTrackFile, "get custom track in file"); hPrintf("<BR>\n"); cgiMakeButton(hgtaDoGetCustomTrackGb, "get custom track in genome browser"); } } else { if (doGalaxy()) { cgiMakeHiddenVar(hgtaDoGetBed, "get BED"); printGalaxySubmitButtons(); } else if (doGreat()) { cgiMakeHiddenVar(hgtaDoGetBed, "get BED"); printGreatSubmitButtons(); } else cgiMakeButton(hgtaDoGetBed, "get BED"); } if (!doGalaxy() && !doGreat()) { hPrintf(" "); cgiMakeButton(hgtaDoMainPage, "cancel"); hPrintf("</FORM>\n"); } cgiDown(0.9); htmlClose(); }
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from BAM. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bamGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); char *fileName = bamFileName(table, conn, region->chrom); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); maxOut --; } } freeMem(fileName); lmCleanup(&lm); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void doSummaryStatsBed(struct sqlConnection *conn) /* Put up page showing summary stats for track that is in database * or that is bed-format custom. */ { struct bed *bedList = NULL; struct region *regionList = getRegions(), *region; char *regionName = getRegionName(); long long regionSize = 0, gapTotal = 0, realSize = 0; long startTime, midTime, endTime; long loadTime = 0, calcTime = 0, freeTime = 0; struct covStats *itemCovList = NULL, *blockCovList = NULL, *cov; int itemCount = 0; struct hTableInfo *hti = getHti(database, curTable, conn); int minScore = BIGNUM, maxScore = -BIGNUM; long long sumScores = 0; boolean hasBlocks = hti->hasBlocks; boolean hasScore = (hti->scoreField[0] != 0); int fieldCount; htmlOpen("%s (%s) Summary Statistics", curTableLabel(), curTable); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(64*1024); startTime = clock1000(); bedList = cookedBedList(conn, curTable, region, lm, &fieldCount); if (fieldCount < 12) hasBlocks = FALSE; if (fieldCount < 5) hasScore = FALSE; midTime = clock1000(); loadTime += midTime - startTime; if (bedList != NULL) { itemCount += slCount(bedList); regionSize += region->end - region->start; cov = calcSpanOverRegion(region, bedList); slAddHead(&itemCovList, cov); if (hasBlocks) { cov = calcBlocksOverRegion(region, bedList); slAddHead(&blockCovList, cov); } if (hti->scoreField[0] != 0) { struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) { int score = bed->score; if (score < minScore) minScore = score; if (score > maxScore) maxScore = score; sumScores += score; } } } endTime = clock1000(); calcTime += endTime - midTime; lmCleanup(&lm); bedList = NULL; freeTime += clock1000() - endTime; } regionSize = basesInRegion(regionList, 0); gapTotal = gapsInRegion(conn, regionList, 0); realSize = regionSize - gapTotal; hTableStart(); startTime = clock1000(); numberStatRow("item count", itemCount); if (itemCount > 0) { cov = covStatsSum(itemCovList); percentStatRow("item bases", cov->basesCovered, realSize); percentStatRow("item total", cov->sumBases, realSize); numberStatRow("smallest item", cov->minBases); numberStatRow("average item", round((double)cov->sumBases/cov->itemCount)); numberStatRow("biggest item", cov->maxBases); } if (hasBlocks && itemCount > 0) { cov = covStatsSum(blockCovList); hPrintf("<TR><TD>block count</TD><TD ALIGN=RIGHT>"); printLongWithCommas(stdout, cov->itemCount); hPrintf("</TD></TR>\n"); percentStatRow("block bases", cov->basesCovered, realSize); percentStatRow("block total", cov->sumBases, realSize); numberStatRow("smallest block", cov->minBases); numberStatRow("average block", round((double)cov->sumBases/cov->itemCount)); numberStatRow("biggest block", cov->maxBases); } if (hasScore != 0 && itemCount > 0 && sumScores != 0) { numberStatRow("smallest score", minScore); numberStatRow("average score", round((double)sumScores/itemCount)); numberStatRow("biggest score", maxScore); } hTableEnd(); /* Show region and time stats part of stats page. */ webNewSection("Region and Timing Statistics"); hTableStart(); stringStatRow("region", regionName); numberStatRow("bases in region", regionSize); numberStatRow("bases in gaps", gapTotal); floatStatRow("load time", 0.001*loadTime); floatStatRow("calculation time", 0.001*calcTime); floatStatRow("free memory time", 0.001*freeTime); stringStatRow("filter", (anyFilter() ? "on" : "off")); stringStatRow("intersection", (anyIntersection() ? "on" : "off")); hTableEnd(); covStatsFreeList(&itemCovList); covStatsFreeList(&blockCovList); htmlClose(); }
static struct bed *intersectOnRegion( struct sqlConnection *conn, /* Open connection to database. */ struct region *region, /* Region to work inside */ char *table1, /* Table input list is from. */ struct bed *bedList1, /* List before intersection, should be * all within region. */ struct lm *lm, /* Local memory pool. */ int *retFieldCount) /* Field count. */ /* Intersect bed list, consulting CGI vars to figure out * with what table and how. Return intersected result, * which is independent from input. This potentially will * chew up bedList1. */ { /* Grab parameters for intersection from cart. */ double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0); double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100); boolean invTable = cartCgiUsualBoolean(cart, hgtaInvertTable, FALSE); char *op = intersectOp(); /* --- TODO MIKE - replace bedList2, bits2 with baseMask stuff. */ /* Load up intersecting bedList2 (to intersect with) */ int chromSize = hChromSize(database, region->chrom); boolean isBpWise = (sameString("and", op) || sameString("or", op)); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); /* Set up some other local vars. */ struct hTableInfo *hti1 = getHti(database, table1, conn); struct bed *intersectedBedList = NULL; /* Produce intersectedBedList. */ if (isBpWise) { /* --- TODO MIKE - replace, bits1 with baseMask stuff. */ /* Base-pair-wise operation: get bitmap for primary table too */ Bits *bits1 = bitAlloc(chromSize+8); boolean hasBlocks = hti1->hasBlocks; if (retFieldCount != NULL && (*retFieldCount < 12)) hasBlocks = FALSE; bedOrBits(bits1, chromSize, bedList1, hasBlocks, 0); /* invert inputs if necessary */ if (invTable) bitNot(bits1, chromSize); /* do the intersection/union */ if (sameString("and", op)) bitAnd(bits1, bits2, chromSize); else bitOr(bits1, bits2, chromSize); /* clip to region if necessary: */ if (region->start > 0) bitClearRange(bits1, 0, region->start); if (region->end < chromSize) bitClearRange(bits1, region->end, (chromSize - region->end)); /* translate back to bed */ intersectedBedList = bitsToBed4List(bits1, chromSize, region->chrom, 1, region->start, region->end, lm); if (retFieldCount != NULL) *retFieldCount = 4; bitFree(&bits1); } else intersectedBedList = filterBedByOverlap(bedList1, hti1->hasBlocks, op, moreThresh, lessThresh, bits2, chromSize); bitFree(&bits2); return intersectedBedList; }
void showMainControlTable(struct sqlConnection *conn) /* Put up table with main controls for main page. */ { struct grp *selGroup; boolean isWig = FALSE, isPositional = FALSE, isMaf = FALSE, isBedGr = FALSE, isChromGraphCt = FALSE, isPal = FALSE, isArray = FALSE, isBam = FALSE, isVcf = FALSE, isHalSnake = FALSE, isLongTabix = FALSE; boolean gotClade = hGotClade(); struct hTableInfo *hti = NULL; hPrintf("<TABLE BORDER=0>\n"); /* Print clade, genome and assembly line. */ { if (gotClade) { hPrintf("<TR><TD><B>clade:</B>\n"); printCladeListHtml(hGenome(database), onChangeClade()); nbSpaces(3); hPrintf("<B>genome:</B>\n"); printGenomeListForCladeHtml(database, onChangeOrg()); } else { hPrintf("<TR><TD><B>genome:</B>\n"); printGenomeListHtml(database, onChangeOrg()); } nbSpaces(3); hPrintf("<B>assembly:</B>\n"); printAssemblyListHtml(database, onChangeDb()); hPrintf("</TD></TR>\n"); } /* Print group and track line. */ { hPrintf("<TR><TD>"); selGroup = showGroupField(hgtaGroup, onChangeGroupOrTrack(), conn, hAllowAllTables()); nbSpaces(3); curTrack = showTrackField(selGroup, hgtaTrack, onChangeGroupOrTrack(), FALSE); nbSpaces(3); boolean hasCustomTracks = FALSE; struct trackDb *t; for (t = fullTrackList; t != NULL; t = t->next) { if (isCustomTrack(t->table)) { hasCustomTracks = TRUE; break; } } hOnClickButton("document.customTrackForm.submit();return false;", hasCustomTracks ? CT_MANAGE_BUTTON_LABEL : CT_ADD_BUTTON_LABEL); hPrintf(" "); if (hubConnectTableExists()) hOnClickButton("document.trackHubForm.submit();return false;", "track hubs"); hPrintf("</TD></TR>\n"); } /* Print table line. */ { hPrintf("<TR><TD>"); curTable = showTableField(curTrack, hgtaTable, TRUE); if (isHubTrack(curTable) || (strchr(curTable, '.') == NULL)) /* In same database */ { hti = getHti(database, curTable, conn); isPositional = htiIsPositional(hti); } isLongTabix = isLongTabixTable( curTable); isBam = isBamTable( curTable); isVcf = isVcfTable(curTable, NULL); isWig = isWiggle(database, curTable); if (isBigWigTable(curTable)) { isPositional = TRUE; isWig = TRUE; } isHalSnake = isHalTable( curTable); isMaf = isMafTable(database, curTrack, curTable); isBedGr = isBedGraph(curTable); isArray = isMicroarray(curTrack, curTable); struct trackDb *tdb = findTdbForTable(database, curTrack, curTable, ctLookupName); isPal = isPalCompatible(conn, tdb, curTable); nbSpaces(1); if (isCustomTrack(curTable)) { isChromGraphCt = isChromGraph(tdb); } cgiMakeButton(hgtaDoSchema, "describe table schema"); hPrintf("</TD></TR>\n"); } if (curTrack == NULL) { struct trackDb *tdb = hTrackDbForTrack(database, curTable); struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb); if (cTdb) curTrack = cTdb; else curTrack = tdb; isMaf = isMafTable(database, curTrack, curTable); } /* Region line */ { char *regionType = cartUsualString(cart, hgtaRegionType, hgtaRegionTypeGenome); char *range = cartUsualString(cart, hgtaRange, ""); if (isPositional) { boolean doEncode = FALSE; if (!trackHubDatabase(database)) doEncode = sqlTableExists(conn, "encodeRegions"); hPrintf("<TR><TD><B>region:</B>\n"); /* If regionType not allowed force it to "genome". */ if ((sameString(regionType, hgtaRegionTypeUserRegions) && userRegionsFileName() == NULL) || (sameString(regionType, hgtaRegionTypeEncode) && !doEncode)) regionType = hgtaRegionTypeGenome; // Is "genome" is not allowed because of tdb 'tableBrowser noGenome'? boolean disableGenome = ((curTrack && cartTrackDbIsNoGenome(database, curTrack->table)) || (curTable && cartTrackDbIsNoGenome(database, curTable))); // If "genome" is selected but not allowed, force it to "range": if (sameString(regionType, hgtaRegionTypeGenome) && disableGenome) regionType = hgtaRegionTypeRange; jsTrackingVar("regionType", regionType); if (disableGenome) { makeRegionButtonExtraHtml(hgtaRegionTypeGenome, regionType, "DISABLED"); hPrintf(" <span"NO_GENOME_CLASS">genome (unavailable for selected track)</span>" " "); } else { makeRegionButton(hgtaRegionTypeGenome, regionType); hPrintf(" genome "); } if (doEncode) { makeRegionButton(hgtaRegionTypeEncode, regionType); hPrintf(" ENCODE Pilot regions "); } makeRegionButton(hgtaRegionTypeRange, regionType); hPrintf(" position "); hPrintf("<INPUT TYPE=TEXT NAME=\"%s\" SIZE=26 VALUE=\"%s\" onFocus=\"%s\">\n", hgtaRange, range, jsRadioUpdate(hgtaRegionType, "regionType", "range")); cgiMakeButton(hgtaDoLookupPosition, "lookup"); hPrintf(" "); if (userRegionsFileName() != NULL) { makeRegionButton(hgtaRegionTypeUserRegions, regionType); hPrintf(" defined regions "); cgiMakeButton(hgtaDoSetUserRegions, "change"); hPrintf(" "); cgiMakeButton(hgtaDoClearUserRegions, "clear"); } else cgiMakeButton(hgtaDoSetUserRegions, "define regions"); hPrintf("</TD></TR>\n"); } else { /* Need to put at least stubs of cgi variables in for JavaScript to work. */ jsTrackingVar("regionType", regionType); cgiMakeHiddenVar(hgtaRange, range); cgiMakeHiddenVar(hgtaRegionType, regionType); } /* Select identifiers line (if applicable). */ if (!isWig && getIdField(database, curTrack, curTable, hti) != NULL) { hPrintf("<TR><TD><B>identifiers (names/accessions):</B>\n"); cgiMakeButton(hgtaDoPasteIdentifiers, "paste list"); hPrintf(" "); cgiMakeButton(hgtaDoUploadIdentifiers, "upload list"); if (identifierFileName() != NULL) { hPrintf(" "); cgiMakeButton(hgtaDoClearIdentifiers, "clear list"); } hPrintf("</TD></TR>\n"); } } /* microarray options */ /* button for option page here (median/log-ratio, etc) */ /* Filter line. */ { hPrintf("<TR><TD><B>filter:</B>\n"); if (anyFilter()) { cgiMakeButton(hgtaDoFilterPage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearFilter, "clear"); if (isWig || isBedGr) wigShowFilter(conn); } else { cgiMakeButton(hgtaDoFilterPage, "create"); } hPrintf("</TD></TR>\n"); } /* Composite track subtrack merge line. */ boolean canSubtrackMerge = (curTrack && tdbIsComposite(curTrack) && !isBam && !isVcf && !isLongTabix); if (canSubtrackMerge) { hPrintf("<TR><TD><B>subtrack merge:</B>\n"); if (anySubtrackMerge(database, curTable)) { cgiMakeButton(hgtaDoSubtrackMergePage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearSubtrackMerge, "clear"); } else { cgiMakeButton(hgtaDoSubtrackMergePage, "create"); } hPrintf("</TD></TR>\n"); } /* Intersection line. */ if (isPositional) { if (anyIntersection()) { hPrintf("<TR><TD><B>intersection with %s:</B>\n", cartString(cart, hgtaIntersectTable)); cgiMakeButton(hgtaDoIntersectPage, "edit"); hPrintf(" "); cgiMakeButton(hgtaDoClearIntersect, "clear"); hPrintf("</TD></TR>\n"); } else if (canIntersect(database, curTable)) { hPrintf("<TR><TD><B>intersection:</B>\n"); cgiMakeButton(hgtaDoIntersectPage, "create"); hPrintf("</TD></TR>\n"); } } /* Correlation line. */ struct trackDb *tdb = findTdbForTable(database, curTrack, curTable, ctLookupName); if (correlateTrackTableOK(tdb, curTable)) { char *table2 = cartUsualString(cart, hgtaCorrelateTable, "none"); hPrintf("<TR><TD><B>correlation:</B>\n"); if (differentWord(table2, "none") && strlen(table2) && ! isNoGenomeDisabled(database, table2)) { struct grp *groupList = fullGroupList; struct grp *selGroup = findSelectedGroup(groupList, hgtaCorrelateGroup); struct trackDb *tdb2 = findSelectedTrack(fullTrackList, selGroup,hgtaCorrelateTrack); if (tdbIsComposite(tdb2)) { struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(tdb2->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { struct trackDb *subTdb = tdbRef->val; if (sameString(table2, subTdb->table)) { tdb2 = subTdb; break; } } slFreeList(&tdbRefList); } cgiMakeButton(hgtaDoCorrelatePage, "calculate"); cgiMakeButton(hgtaDoClearCorrelate, "clear"); if (tdb2 && tdb2->shortLabel) hPrintf(" (with: %s)", tdb2->shortLabel); #ifdef NOT_YET /* debugging dbg vvvvv */ if (curTrack && curTrack->type) /* dbg */ { hPrintf("<BR> (debug: '%s', '%s(%s)')", curTrack->type, tdb2->type, table2); } /* debugging debug ^^^^^ */ #endif } else cgiMakeButton(hgtaDoCorrelatePage, "create"); hPrintf("</TD></TR>\n"); } /* Print output type line. */ showOutputTypeRow(isWig, isBedGr, isPositional, isMaf, isChromGraphCt, isPal, isArray, isHalSnake); /* Print output destination line. */ { char *compressType = cartUsualString(cart, hgtaCompressType, textOutCompressNone); char *fileName = cartUsualString(cart, hgtaOutFileName, ""); hPrintf("<TR><TD>\n"); hPrintf("<B>output file:</B> "); cgiMakeTextVar(hgtaOutFileName, fileName, 29); hPrintf(" (leave blank to keep output in browser)</TD></TR>\n"); hPrintf("<TR><TD>\n"); hPrintf("<B>file type returned: </B>"); cgiMakeRadioButton(hgtaCompressType, textOutCompressNone, sameWord(textOutCompressNone, compressType)); hPrintf(" plain text  "); cgiMakeRadioButton(hgtaCompressType, textOutCompressGzip, sameWord(textOutCompressGzip, compressType)); hPrintf(" gzip compressed"); hPrintf("</TD></TR>\n"); } hPrintf("</TABLE>\n"); /* Submit buttons. */ { hPrintf("<BR>\n"); if (isWig || isBam || isVcf || isLongTabix) { char *name; extern char *maxOutMenu[]; char *maxOutput = maxOutMenu[0]; if (isCustomTrack(curTable)) name=filterFieldVarName("ct", curTable, "_", filterMaxOutputVar); else name=filterFieldVarName(database,curTable, "_",filterMaxOutputVar); maxOutput = cartUsualString(cart, name, maxOutMenu[0]); if (isWig) hPrintf( "<I>Note: to return more than %s lines, change the filter setting" " (above). The entire data set may be available for download as" " a very large file that contains the original data values (not" " compressed into the wiggle format) -- see the Downloads page." "</I><BR>", maxOutput); else if (isBam || isVcf || isLongTabix) hPrintf( "<I>Note: to return more than %s lines, change the filter setting" " (above). Please consider downloading the entire data from our Download pages." "</I><BR>", maxOutput); } else if (anySubtrackMerge(database, curTable) || anyIntersection()) { hPrintf("<I>Note: The all fields and selected fields output formats " "are not available when a%s has been specified.</I><BR>", canSubtrackMerge ? " subtrack merge or intersection" : "n intersection"); } cgiMakeButton(hgtaDoTopSubmit, "get output"); hPrintf(" "); if (isPositional || isWig) { cgiMakeButton(hgtaDoSummaryStats, "summary/statistics"); hPrintf(" "); } #ifdef SOMETIMES hPrintf(" "); cgiMakeButton(hgtaDoTest, "test"); #endif /* SOMETIMES */ } hPrintf("<P>" "To reset <B>all</B> user cart settings (including custom tracks), \n" "<A HREF=\"/cgi-bin/cartReset?destination=%s\">click here</A>.\n", getScriptName()); }
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = vcfGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } // If we are outputting a subset of fields, invalidate the VCF header. boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS); if (!allFields) fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n"); struct asObject *as = vcfAsObj(); struct asFilter *filter = NULL; if (anyFilter()) filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(tdb, conn, table, region->chrom); struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { fprintf(f, "%s", vcff->headerString); if (filter) fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); if (!allFields) { fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); } printedHeader = TRUE; } char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; for (rec = vcff->records; rec != NULL && (maxOut > 0); rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL)) continue; // All fields output: after asFilter'ing, preserve original VCF chrom if (allFields && !sameString(rec->chrom, region->chrom)) row[0] = rec->chrom; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) { fprintf(f, "\t%s", row[columnArray[i]]); } fprintf(f, "\n"); maxOut --; } } vcfFileFree(&vcff); freeMem(fileName); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }