Example #1
0
struct bed *bamGetFilteredBedsOnRegions(struct sqlConnection *conn,
	char *db, char *table, struct region *regionList, struct lm *lm,
	int *retFieldCount)
/* Get list of beds from BAM, in all regions, that pass filtering. */
{
int maxOut = bigFileMaxOutput();
/* Figure out bam file name get column info and filter. */
struct asObject *as = bamAsObj();
struct asFilter *filter = asFilterFromCart(cart, db, table, as);
struct hash *idHash = identifierHash(db, table);

/* Get beds a region at a time. */
struct bed *bedList = NULL;
struct region *region;
for (region = regionList; region != NULL; region = region->next)
    {
    char *fileName = bamFileName(table, conn, region->chrom);
    addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut);
    freeMem(fileName);
    if (maxOut <= 0)
	{
	warn("Reached output limit of %d data values, please make region smaller,\n"
	     "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
	break;
	}
    }
slReverse(&bedList);
return bedList;
}
Example #2
0
struct wigAsciiData *getWiggleAsData(struct sqlConnection *conn, char *table,
	struct region *region)
/*	return the wigAsciiData list	*/
{
int maxOut = 0;
struct wigAsciiData *data = NULL;

maxOut = bigFileMaxOutput();
// ignore return outCount from wigOutRegion:
wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data, 0);

return data;
}
Example #3
0
struct bed *getWiggleAsBed(
    char *db, char *table, 	/* Database and table. */
    struct region *region,	/* Region to get data for. */
    char *filter, 		/* Filter to add to SQL where clause if any. */
    struct hash *idHash, 	/* Restrict to id's in this hash if non-NULL. */
    struct lm *lm,		/* Where to allocate memory. */
    struct sqlConnection *conn)	/* SQL connection to work with */
/* Return a bed list of all items in the given range in table.
 * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
/* filter, idHash and lm are currently unused, perhaps future use	*/
{
struct bed *bedList=NULL;
char splitTableOrFileName[HDB_MAX_TABLE_STRING];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchBed;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;
int maxOut;

WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */

if (hasConstraint)
    freeMem(dataConstraint);	/* been cloned into wds */

maxOut = bigFileMaxOutput();

wds->setMaxOutput(wds, maxOut);

wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);

if (table2)
    intersectBedList = bedTable2(conn, region, table2);

if (isCustom)
    {
    if (ct->dbTrack)
	{
	unsigned span = 0;
	struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	valuesMatched = getWigglePossibleIntersection(wds, region,
	    CUSTOM_TRASH, table2, &intersectBedList,
		splitTableOrFileName, operations);
	span = minSpan(trashConn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);
	hFreeConn(&trashConn);
	}
    else
	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
	    &intersectBedList, splitTableOrFileName, operations);
    }
else
    {
    if (conn == NULL)
	errAbort( "getWiggleAsBed: NULL conn given for database table");

    if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, sizeof splitTableOrFileName, NULL))
	{
	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
	unsigned span = 0;

	/* XXX TBD, watch for a span limit coming in as an SQL filter */
	span = minSpan(conn, splitTableOrFileName, region->chrom,
	    region->start, region->end, cart, tdb);
	wds->setSpanConstraint(wds, span);

	valuesMatched = getWigglePossibleIntersection(wds, region, database,
	    table2, &intersectBedList, splitTableOrFileName, operations);
	}
    }

if (valuesMatched > 0)
    {
    struct bed *bed;

    wds->sortResults(wds);
    for (bed = wds->bed; bed != NULL; bed = bed->next)
	{
	struct bed *copy = lmCloneBed(bed, lm);
	slAddHead(&bedList, copy);
	}
    slReverse(&bedList);
    }

wiggleDataStreamFree(&wds);

return bedList;
}	/*	struct bed *getWiggleAsBed()	*/
Example #4
0
static void doOutWig(struct trackDb *track, char *table, struct sqlConnection *conn,
	enum wigOutputType wigOutType)
{
struct region *regionList = getRegions(), *region;
int maxOut = 0, outCount, curOut = 0;
char *shortLabel = table, *longLabel = table;

if (track == NULL)
    errAbort("Sorry, can't find necessary track information for %s.  "
	     "If you reached this page by selecting \"All tables\" as the "
	     "group, please go back and select the same table via a regular "
	     "track group if possible.",
	     table);

maxOut = bigFileMaxOutput();

if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE))
    fputs("#", stdout);
else
    textOpen();

if (track != NULL)
    {
    if (!sameString(track->table, table) && track->subtracks != NULL)
	{
	struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(track->subtracks);
	struct slRef *tdbRef;
	for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next)
	    {
	    struct trackDb *tdb = tdbRef->val;
	    if (sameString(tdb->table, table))
		{
		track = tdb;
		break;
		}
	    }
	slFreeList(&tdbRefList);
	}
    shortLabel = track->shortLabel;
    longLabel = track->longLabel;
    }
wigDataHeader(shortLabel, longLabel, NULL, wigOutType);

for (region = regionList; region != NULL; region = region->next)
    {
    int curMaxOut = maxOut - curOut;
    if (anySubtrackMerge(database, table))
	outCount = mergedWigOutRegion(table, conn, region, curMaxOut,
				      wigOutType);
    else if (startsWithWord("bedGraph", track->type))
	outCount = bedGraphOutRegion(table, conn, region, curMaxOut,
				     wigOutType);
    else if (startsWithWord("mathWig", track->type))
        outCount = mathWigOutRegion(track, table, conn, region, curMaxOut, wigOutType);
    else if (startsWithWord("bigWig", track->type))
        outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType);
    else
	outCount = wigOutRegion(table, conn, region, curMaxOut,
				wigOutType, NULL, 0);
    curOut += outCount;
    if (curOut >= maxOut)
        break;
    }
if (curOut >= maxOut)
    errAbort("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", curOut);
}
Example #5
0
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
/* Print out selected fields from BAM.  If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;

/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
    idHash = identifierHash(db, table);

if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = bamGetFields();
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    {
    /* if we know the field for identifiers, save it away */
    if ((idField != NULL) && sameString(idField, bb->name))
	idFieldNum = i;
    hashAddInt(fieldHash, bb->name, i);
    }

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

/* Output row of labels */
fprintf(f, "#%s", fieldArray[0]);
for (i=1; i<fieldCount; ++i)
    fprintf(f, "\t%s", fieldArray[i]);
fprintf(f, "\n");

struct asObject *as = bamAsObj();
struct asFilter *filter = NULL;

if (anyFilter())
    {
    filter = asFilterFromCart(cart, db, table, as);
    if (filter)
        {
	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	}
    }

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();

int maxOut = bigFileMaxOutput();
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
    {
    struct lm *lm = lmInit(0);
    char *fileName = bamFileName(table, conn, region->chrom);
    struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom,
    	region->start, region->end, lm);
    char *row[SAMALIGNMENT_NUM_COLS];
    char numBuf[BAM_NUM_BUF_SIZE];
    for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next)
        {
	samAlignmentToRow(sam, numBuf, row);
	if (asFilterOnRow(filter, row))
	    {
	    /* if we're looking for identifiers, check if this matches */
	    if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL))
		continue;

	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", row[columnArray[i]]);
	    fprintf(f, "\n");
	    maxOut --;
	    }
	}
    freeMem(fileName);
    lmCleanup(&lm);
    }

if (maxOut == 0)
    warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
/* Clean up and exit. */
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}
Example #6
0
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
	       boolean isTabix)
/* Print out selected fields from VCF.  If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;

/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
    idHash = identifierHash(db, table);

if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = vcfGetFields();
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    {
    /* if we know the field for identifiers, save it away */
    if ((idField != NULL) && sameString(idField, bb->name))
	idFieldNum = i;
    hashAddInt(fieldHash, bb->name, i);
    }

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

// If we are outputting a subset of fields, invalidate the VCF header.
boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS);
if (!allFields)
    fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n");

struct asObject *as = vcfAsObj();
struct asFilter *filter = NULL;
if (anyFilter())
    filter = asFilterFromCart(cart, db, table, as);

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
int maxOut = bigFileMaxOutput();
struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
// Include the header, absolutely necessary for VCF parsing.
boolean printedHeader = FALSE;
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
    {
    char *fileName = vcfFileName(tdb, conn, table, region->chrom);
    struct vcfFile *vcff;
    if (isTabix)
	vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
				   100, maxOut);
    else
	vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
			      100, maxOut, TRUE);
    if (vcff == NULL)
	noWarnAbort();
    // If we are outputting all fields, but this VCF has no genotype info, omit the
    // genotype columns from output:
    if (allFields && vcff->genotypeCount == 0)
	fieldCount = VCFDATALINE_NUM_COLS - 2;
    if (!printedHeader)
	{
	fprintf(f, "%s", vcff->headerString);
	if (filter)
	    fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	if (!allFields)
	    {
	    fprintf(f, "#%s", fieldArray[0]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", fieldArray[i]);
	    fprintf(f, "\n");
	    }
	printedHeader = TRUE;
	}
    char *row[VCFDATALINE_NUM_COLS];
    char numBuf[VCF_NUM_BUF_SIZE];
    for (rec = vcff->records;  rec != NULL && (maxOut > 0);  rec = rec->next)
        {
	vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
	if (asFilterOnRow(filter, row))
	    {
	    /* if we're looking for identifiers, check if this matches */
	    if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL))
		continue;
	    // All fields output: after asFilter'ing, preserve original VCF chrom
	    if (allFields && !sameString(rec->chrom, region->chrom))
		row[0] = rec->chrom;
	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		{
		fprintf(f, "\t%s", row[columnArray[i]]);
		}
	    fprintf(f, "\n");
	    maxOut --;
	    }
	}
    vcfFileFree(&vcff);
    freeMem(fileName);
    }

if (maxOut == 0)
    warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
/* Clean up and exit. */
dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}