struct bed *bamGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from BAM, in all regions, that pass filtering. */ { int maxOut = bigFileMaxOutput(); /* Figure out bam file name get column info and filter. */ struct asObject *as = bamAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct hash *idHash = identifierHash(db, table); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) { char *fileName = bamFileName(table, conn, region->chrom); addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut); freeMem(fileName); if (maxOut <= 0) { warn("Reached output limit of %d data values, please make region smaller,\n" "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); break; } } slReverse(&bedList); return bedList; }
struct wigAsciiData *getWiggleAsData(struct sqlConnection *conn, char *table, struct region *region) /* return the wigAsciiData list */ { int maxOut = 0; struct wigAsciiData *data = NULL; maxOut = bigFileMaxOutput(); // ignore return outCount from wigOutRegion: wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data, 0); return data; }
struct bed *getWiggleAsBed( char *db, char *table, /* Database and table. */ struct region *region, /* Region to get data for. */ char *filter, /* Filter to add to SQL where clause if any. */ struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */ struct lm *lm, /* Where to allocate memory. */ struct sqlConnection *conn) /* SQL connection to work with */ /* Return a bed list of all items in the given range in table. * Cleanup result via lmCleanup(&lm) rather than bedFreeList. */ /* filter, idHash and lm are currently unused, perhaps future use */ { struct bed *bedList=NULL; char splitTableOrFileName[HDB_MAX_TABLE_STRING]; struct customTrack *ct = NULL; boolean isCustom = FALSE; boolean hasConstraint = FALSE; struct wiggleDataStream *wds = NULL; unsigned long long valuesMatched = 0; int operations = wigFetchBed; char *dataConstraint; double ll = 0.0; double ul = 0.0; char *table2 = NULL; struct bed *intersectBedList = NULL; int maxOut; WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */ if (hasConstraint) freeMem(dataConstraint); /* been cloned into wds */ maxOut = bigFileMaxOutput(); wds->setMaxOutput(wds, maxOut); wds->setChromConstraint(wds, region->chrom); wds->setPositionConstraint(wds, region->start, region->end); if (table2) intersectBedList = bedTable2(conn, region, table2); if (isCustom) { if (ct->dbTrack) { unsigned span = 0; struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH); struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); valuesMatched = getWigglePossibleIntersection(wds, region, CUSTOM_TRASH, table2, &intersectBedList, splitTableOrFileName, operations); span = minSpan(trashConn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); hFreeConn(&trashConn); } else valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2, &intersectBedList, splitTableOrFileName, operations); } else { if (conn == NULL) errAbort( "getWiggleAsBed: NULL conn given for database table"); if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL)) { struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName); unsigned span = 0; /* XXX TBD, watch for a span limit coming in as an SQL filter */ span = minSpan(conn, splitTableOrFileName, region->chrom, region->start, region->end, cart, tdb); wds->setSpanConstraint(wds, span); valuesMatched = getWigglePossibleIntersection(wds, region, database, table2, &intersectBedList, splitTableOrFileName, operations); } } if (valuesMatched > 0) { struct bed *bed; wds->sortResults(wds); for (bed = wds->bed; bed != NULL; bed = bed->next) { struct bed *copy = lmCloneBed(bed, lm); slAddHead(&bedList, copy); } slReverse(&bedList); } wiggleDataStreamFree(&wds); return bedList; } /* struct bed *getWiggleAsBed() */
static void doOutWig(struct trackDb *track, char *table, struct sqlConnection *conn, enum wigOutputType wigOutType) { struct region *regionList = getRegions(), *region; int maxOut = 0, outCount, curOut = 0; char *shortLabel = table, *longLabel = table; if (track == NULL) errAbort("Sorry, can't find necessary track information for %s. " "If you reached this page by selecting \"All tables\" as the " "group, please go back and select the same table via a regular " "track group if possible.", table); maxOut = bigFileMaxOutput(); if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE)) fputs("#", stdout); else textOpen(); if (track != NULL) { if (!sameString(track->table, table) && track->subtracks != NULL) { struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(track->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { struct trackDb *tdb = tdbRef->val; if (sameString(tdb->table, table)) { track = tdb; break; } } slFreeList(&tdbRefList); } shortLabel = track->shortLabel; longLabel = track->longLabel; } wigDataHeader(shortLabel, longLabel, NULL, wigOutType); for (region = regionList; region != NULL; region = region->next) { int curMaxOut = maxOut - curOut; if (anySubtrackMerge(database, table)) outCount = mergedWigOutRegion(table, conn, region, curMaxOut, wigOutType); else if (startsWithWord("bedGraph", track->type)) outCount = bedGraphOutRegion(table, conn, region, curMaxOut, wigOutType); else if (startsWithWord("mathWig", track->type)) outCount = mathWigOutRegion(track, table, conn, region, curMaxOut, wigOutType); else if (startsWithWord("bigWig", track->type)) outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType); else outCount = wigOutRegion(table, conn, region, curMaxOut, wigOutType, NULL, 0); curOut += outCount; if (curOut >= maxOut) break; } if (curOut >= maxOut) errAbort("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", curOut); }
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from BAM. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bamGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); char *fileName = bamFileName(table, conn, region->chrom); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); maxOut --; } } freeMem(fileName); lmCleanup(&lm); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = vcfGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } // If we are outputting a subset of fields, invalidate the VCF header. boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS); if (!allFields) fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n"); struct asObject *as = vcfAsObj(); struct asFilter *filter = NULL; if (anyFilter()) filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(tdb, conn, table, region->chrom); struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { fprintf(f, "%s", vcff->headerString); if (filter) fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); if (!allFields) { fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); } printedHeader = TRUE; } char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; for (rec = vcff->records; rec != NULL && (maxOut > 0); rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL)) continue; // All fields output: after asFilter'ing, preserve original VCF chrom if (allFields && !sameString(rec->chrom, region->chrom)) row[0] = rec->chrom; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) { fprintf(f, "\t%s", row[columnArray[i]]); } fprintf(f, "\n"); maxOut --; } } vcfFileFree(&vcff); freeMem(fileName); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }