struct annoStreamer *annoStreamVcfNew(char *fileOrUrl, boolean isTabix, struct annoAssembly *aa, int maxRecords) /* Create an annoStreamer (subclass) object from a VCF file, which may * or may not have been compressed and indexed by tabix. */ { int maxErr = -1; // don't errAbort on VCF format warnings/errs struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileOrUrl, NULL, 0, 0, maxErr, 0); else vcff = vcfFileMayOpen(fileOrUrl, maxErr, 0, FALSE); if (vcff == NULL) errAbort("annoStreamVcfNew: unable to open VCF: '%s'", fileOrUrl); struct annoStreamVcf *self; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); struct asObject *asObj = vcfAsObj(); annoStreamerInit(streamer, aa, asObj, fileOrUrl); streamer->rowType = arWords; streamer->setRegion = asvSetRegion; streamer->getHeader = asvGetHeader; streamer->nextRow = asvNextRow; streamer->close = asvClose; self->vcff = vcff; self->dyGt = dyStringNew(1024); self->chromNameHash = hashNew(0); self->isTabix = isTabix; self->numCols = slCount(asObj->columnList); self->numFileCols = 8; if (vcff->genotypeCount > 0) self->numFileCols = 9 + vcff->genotypeCount; self->maxRecords = maxRecords; return (struct annoStreamer *)self; }
struct annoStreamer *annoStreamWigDbNew(char *db, char *table, struct annoAssembly *aa, int maxOutput) /* Create an annoStreamer (subclass) object from a wiggle database table. */ { struct annoStreamWig *self = NULL; AllocVar(self); self->wigStr = annoStreamDbNew(db, table, aa, asParseText(wiggleAsText), maxOutput); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asParseText(annoRowWigAsText), self->wigStr->name); streamer->rowType = arWigVec; streamer->setRegion = aswSetRegion; streamer->nextRow = aswNextRow; streamer->close = aswClose; return (struct annoStreamer *)self; }
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa) /* Create an annoStreamer (subclass) object from a file or URL. */ { struct bbiFile *bbi = bigWigFileOpen(fileOrUrl); struct asObject *asObj = asParseText(annoRowBigWigAsText); struct annoStreamBigWig *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); streamer->rowType = arWig; streamer->setRegion = asbwSetRegion; streamer->nextRow = asbwNextRow; streamer->close = asbwClose; self->chromList = bbiChromList(bbi); self->bbi = bbi; return (struct annoStreamer *)self; }
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa) /* Create an annoStreamer (subclass) object from a file or URL. */ { struct bbiFile *bbi = bigWigFileOpen(fileOrUrl); struct asObject *asObj = annoStreamBigWigAsObject(); struct annoStreamBigWig *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); //#*** Would be more memory-efficient to do arWigSingle for bedGraphs. //#*** annoGrateWig would need to be updated to handle incoming arWigSingle. streamer->rowType = arWigVec; streamer->setRegion = asbwSetRegion; streamer->nextRow = asbwNextRow; streamer->close = asbwClose; self->chromList = bbiChromList(bbi); self->bbi = bbi; return (struct annoStreamer *)self; }
void annoGratorInit(struct annoGrator *self, struct annoStreamer *mySource) /* Initialize an integrator of columns from mySource with (positions of) * rows passed to integrate(). * mySource becomes property of the annoGrator. */ { struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, mySource->assembly, mySource->getAutoSqlObject(mySource), mySource->name); streamer->rowType = mySource->rowType; streamer->setAutoSqlObject = agSetAutoSqlObject; streamer->setFilters = agSetFilters; streamer->setRegion = annoGratorSetRegion; streamer->nextRow = noNextRow; streamer->close = annoGratorClose; self->qLm = lmInit(0); self->integrate = annoGratorIntegrate; self->setOverlapRule = agSetOverlapRule; self->overlapRule = agoNoConstraint; self->mySource = mySource; self->haveRJIncludeFilter = filtersHaveRJInclude(streamer->filters); }
struct annoStreamer *annoStreamTabNew(char *fileOrUrl, struct annoAssembly *aa, struct asObject *asObj) /* Create an annoStreamer (subclass) object from a tab-separated text file/URL * whose columns are described by asObj (possibly excepting bin column at beginning). */ { struct lineFile *lf = astLFOpen(fileOrUrl); struct annoStreamTab *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); streamer->rowType = arWords; streamer->setRegion = astSetRegion; streamer->nextRow = astNextRow; streamer->close = astClose; AllocArray(self->asWords, streamer->numCols); self->lf = lf; self->eof = FALSE; self->fileOrUrl = cloneString(fileOrUrl); if (!astInitBed3Fields(self)) errAbort("annoStreamTabNew: can't figure out which fields of %s to use as " "{chrom, chromStart, chromEnd}.", fileOrUrl); return (struct annoStreamer *)self; }
struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa, struct asObject *asObj, int maxOutRows) /* Create an annoStreamer (subclass) object from a database table described by asObj. */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, table)) errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db); struct annoStreamDb *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); int dbtLen = strlen(db) + strlen(table) + 2; char dbTable[dbtLen]; safef(dbTable, dbtLen, "%s.%s", db, table); annoStreamerInit(streamer, aa, asObj, dbTable); streamer->rowType = arWords; streamer->setRegion = asdSetRegion; streamer->nextRow = asdNextRow; streamer->close = asdClose; self->conn = conn; self->table = cloneString(table); char *asFirstColumnName = streamer->asObj->columnList->name; if (sqlFieldIndex(self->conn, self->table, "bin") == 0) { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); self->makeBaselineQuery = asdMakeBaselineQuery; // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField); self->notSorted = FALSE; // Special case: genbank-updated tables are not sorted because new mappings are // tacked on at the end. if (isIncrementallyUpdated(table)) self->notSorted = TRUE; self->mergeBins = FALSE; self->maxOutRows = maxOutRows; self->useMaxOutRows = (maxOutRows > 0); self->needQuery = TRUE; self->chromList = annoAssemblySeqNames(aa); if (slCount(self->chromList) > 1000) { // Assembly has many sequences (e.g. scaffold-based assembly) -- // don't break up into per-sequence queries. Take our chances // with mysql being unhappy about the sqlResult being open too long. self->doQuery = asdDoQuerySimple; self->nextRowRaw = nextRowFromSqlResult; } else { // All-chromosome assembly -- if table is large, perform a series of // chunked queries. self->doQuery = asdDoQueryChunking; self->nextRowRaw = nextRowFromBuffer; } return (struct annoStreamer *)self; }