Beispiel #1
0
struct annoStreamer *annoStreamVcfNew(char *fileOrUrl, boolean isTabix, struct annoAssembly *aa,
				      int maxRecords)
/* Create an annoStreamer (subclass) object from a VCF file, which may
 * or may not have been compressed and indexed by tabix. */
{
int maxErr = -1; // don't errAbort on VCF format warnings/errs
struct vcfFile *vcff;
if (isTabix)
    vcff = vcfTabixFileMayOpen(fileOrUrl, NULL, 0, 0, maxErr, 0);
else
    vcff = vcfFileMayOpen(fileOrUrl, maxErr, 0, FALSE);
if (vcff == NULL)
    errAbort("annoStreamVcfNew: unable to open VCF: '%s'", fileOrUrl);
struct annoStreamVcf *self;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
struct asObject *asObj = vcfAsObj();
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWords;
streamer->setRegion = asvSetRegion;
streamer->getHeader = asvGetHeader;
streamer->nextRow = asvNextRow;
streamer->close = asvClose;
self->vcff = vcff;
self->dyGt = dyStringNew(1024);
self->chromNameHash = hashNew(0);
self->isTabix = isTabix;
self->numCols = slCount(asObj->columnList);
self->numFileCols = 8;
if (vcff->genotypeCount > 0)
    self->numFileCols = 9 + vcff->genotypeCount;
self->maxRecords = maxRecords;
return (struct annoStreamer *)self;
}
struct annoStreamer *annoStreamWigDbNew(char *db, char *table, struct annoAssembly *aa,
					int maxOutput)
/* Create an annoStreamer (subclass) object from a wiggle database table. */
{
struct annoStreamWig *self = NULL;
AllocVar(self);
self->wigStr = annoStreamDbNew(db, table, aa, asParseText(wiggleAsText), maxOutput);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asParseText(annoRowWigAsText), self->wigStr->name);
streamer->rowType = arWigVec;
streamer->setRegion = aswSetRegion;
streamer->nextRow = aswNextRow;
streamer->close = aswClose;
return (struct annoStreamer *)self;
}
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = asParseText(annoRowBigWigAsText);
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWig;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Beispiel #4
0
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = annoStreamBigWigAsObject();
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
//#*** Would be more memory-efficient to do arWigSingle for bedGraphs.
//#*** annoGrateWig would need to be updated to handle incoming arWigSingle.
streamer->rowType = arWigVec;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Beispiel #5
0
void annoGratorInit(struct annoGrator *self, struct annoStreamer *mySource)
/* Initialize an integrator of columns from mySource with (positions of)
 * rows passed to integrate().
 * mySource becomes property of the annoGrator. */
{
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, mySource->assembly, mySource->getAutoSqlObject(mySource),
		 mySource->name);
streamer->rowType = mySource->rowType;
streamer->setAutoSqlObject = agSetAutoSqlObject;
streamer->setFilters = agSetFilters;
streamer->setRegion = annoGratorSetRegion;
streamer->nextRow = noNextRow;
streamer->close = annoGratorClose;
self->qLm = lmInit(0);
self->integrate = annoGratorIntegrate;
self->setOverlapRule = agSetOverlapRule;
self->overlapRule = agoNoConstraint;
self->mySource = mySource;
self->haveRJIncludeFilter = filtersHaveRJInclude(streamer->filters);
}
Beispiel #6
0
struct annoStreamer *annoStreamTabNew(char *fileOrUrl, struct annoAssembly *aa,
				      struct asObject *asObj)
/* Create an annoStreamer (subclass) object from a tab-separated text file/URL
 * whose columns are described by asObj (possibly excepting bin column at beginning). */
{
struct lineFile *lf = astLFOpen(fileOrUrl);
struct annoStreamTab *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWords;
streamer->setRegion = astSetRegion;
streamer->nextRow = astNextRow;
streamer->close = astClose;
AllocArray(self->asWords, streamer->numCols);
self->lf = lf;
self->eof = FALSE;
self->fileOrUrl = cloneString(fileOrUrl);
if (!astInitBed3Fields(self))
    errAbort("annoStreamTabNew: can't figure out which fields of %s to use as "
	     "{chrom, chromStart, chromEnd}.", fileOrUrl);
return (struct annoStreamer *)self;

}
Beispiel #7
0
struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa,
				     struct asObject *asObj, int maxOutRows)
/* Create an annoStreamer (subclass) object from a database table described by asObj. */
{
struct sqlConnection *conn = hAllocConn(db);
if (!sqlTableExists(conn, table))
    errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db);
struct annoStreamDb *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
int dbtLen = strlen(db) + strlen(table) + 2;
char dbTable[dbtLen];
safef(dbTable, dbtLen, "%s.%s", db, table);
annoStreamerInit(streamer, aa, asObj, dbTable);
streamer->rowType = arWords;
streamer->setRegion = asdSetRegion;
streamer->nextRow = asdNextRow;
streamer->close = asdClose;
self->conn = conn;
self->table = cloneString(table);
char *asFirstColumnName = streamer->asObj->columnList->name;
if (sqlFieldIndex(self->conn, self->table, "bin") == 0)
    {
    self->hasBin = 1;
    self->minFinestBin = binFromRange(0, 1);
    }
if (self->hasBin && !sameString(asFirstColumnName, "bin"))
    self->omitBin = 1;
if (!asdInitBed3Fields(self))
    errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as "
	     "{chrom, chromStart, chromEnd}.", db, table);
self->makeBaselineQuery = asdMakeBaselineQuery;
// When a table has an index on endField, sometimes the query optimizer uses it
// and that ruins the sorting.  Fortunately most tables don't anymore.
self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);
self->notSorted = FALSE;
// Special case: genbank-updated tables are not sorted because new mappings are
// tacked on at the end.
if (isIncrementallyUpdated(table))
    self->notSorted = TRUE;
self->mergeBins = FALSE;
self->maxOutRows = maxOutRows;
self->useMaxOutRows = (maxOutRows > 0);
self->needQuery = TRUE;
self->chromList = annoAssemblySeqNames(aa);
if (slCount(self->chromList) > 1000)
    {
    // Assembly has many sequences (e.g. scaffold-based assembly) --
    // don't break up into per-sequence queries.  Take our chances
    // with mysql being unhappy about the sqlResult being open too long.
    self->doQuery = asdDoQuerySimple;
    self->nextRowRaw = nextRowFromSqlResult;
    }
else
    {
    // All-chromosome assembly -- if table is large, perform a series of
    // chunked queries.
    self->doQuery = asdDoQueryChunking;
    self->nextRowRaw = nextRowFromBuffer;
    }
return (struct annoStreamer *)self;
}