Пример #1
0
void doVcfDetailsCore(struct trackDb *tdb, char *fileOrUrl, boolean isTabix)
/* Show item details using fileOrUrl. */
{
genericHeader(tdb, NULL);
int start = cartInt(cart, "o");
int end = cartInt(cart, "t");
int vcfMaxErr = -1;
struct vcfFile *vcff = NULL;
/* protect against temporary network or parsing error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
    {
    if (isTabix)
	vcff = vcfTabixFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr, -1);
    else
	vcff = vcfFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr, -1, TRUE);
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    if (isNotEmpty(errCatch->message->string))
	warn("%s", errCatch->message->string);
    }
errCatchFree(&errCatch);
if (vcff != NULL)
    {
    struct vcfRecord *rec;
    for (rec = vcff->records;  rec != NULL;  rec = rec->next)
	if (rec->chromStart == start && rec->chromEnd == end) // in pgSnp mode, don't get name
	    vcfRecordDetails(tdb, rec);
    }
else
    printf("Sorry, unable to open %s<BR>\n", fileOrUrl);
printTrackHtml(tdb);
}
Пример #2
0
struct annoStreamer *annoStreamVcfNew(char *fileOrUrl, boolean isTabix, struct annoAssembly *aa,
				      int maxRecords)
/* Create an annoStreamer (subclass) object from a VCF file, which may
 * or may not have been compressed and indexed by tabix. */
{
int maxErr = -1; // don't errAbort on VCF format warnings/errs
struct vcfFile *vcff;
if (isTabix)
    vcff = vcfTabixFileMayOpen(fileOrUrl, NULL, 0, 0, maxErr, 0);
else
    vcff = vcfFileMayOpen(fileOrUrl, maxErr, 0, FALSE);
if (vcff == NULL)
    errAbort("annoStreamVcfNew: unable to open VCF: '%s'", fileOrUrl);
struct annoStreamVcf *self;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
struct asObject *asObj = vcfAsObj();
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWords;
streamer->setRegion = asvSetRegion;
streamer->getHeader = asvGetHeader;
streamer->nextRow = asvNextRow;
streamer->close = asvClose;
self->vcff = vcff;
self->dyGt = dyStringNew(1024);
self->chromNameHash = hashNew(0);
self->isTabix = isTabix;
self->numCols = slCount(asObj->columnList);
self->numFileCols = 8;
if (vcff->genotypeCount > 0)
    self->numFileCols = 9 + vcff->genotypeCount;
self->maxRecords = maxRecords;
return (struct annoStreamer *)self;
}
Пример #3
0
static void vcfTabixLoadItems(struct track *tg)
/* Load items in window from VCF file using its tabix index file. */
{
char *fileOrUrl = NULL;
/* Figure out url or file name. */
if (tg->parallelLoading)
    {
    /* do not use mysql during parallel-fetch load */
    fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl");
    }
else
    {
    struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
    fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName);
    hFreeConn(&conn);
    }
if (isEmpty(fileOrUrl))
    return;
int vcfMaxErr = -1;
struct vcfFile *vcff = NULL;
boolean hapClustEnabled = cartOrTdbBoolean(cart, tg->tdb, VCF_HAP_ENABLED_VAR, TRUE);
/* protect against temporary network error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
    {
    vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1);
    if (vcff != NULL)
	{
	filterRecords(vcff, tg->tdb);
	if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 &&
	    (tg->visibility == tvPack || tg->visibility == tvSquish))
	    vcfHapClusterOverloadMethods(tg, vcff);
	else
	    {
	    tg->items = vcfFileToPgSnp(vcff, tg->tdb);
	    // pgSnp bases coloring/display decision on count of items:
	    tg->customInt = slCount(tg->items);
	    }
	// Don't vcfFileFree here -- we are using its string pointers!
	}
    }
errCatchEnd(errCatch);
if (errCatch->gotError || vcff == NULL)
    {
    if (isNotEmpty(errCatch->message->string))
	tg->networkErrMsg = cloneString(errCatch->message->string);
    tg->drawItems = bigDrawWarning;
    tg->totalHeight = bigWarnTotalHeight;
    }
errCatchFree(&errCatch);
}
Пример #4
0
static void addFilteredBedsOnRegion(char *fileName, struct region *region, char *table,
				    struct asFilter *filter, struct lm *bedLm,
				    struct bed **pBedList, struct hash *idHash, int *pMaxOut,
				    boolean isTabix)
/* Add relevant beds in reverse order to pBedList */
{
struct vcfFile *vcff;
if (isTabix)
    vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
			       100, *pMaxOut);
else
    vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
			  100, *pMaxOut, TRUE);
if (vcff == NULL)
    noWarnAbort();
struct lm *lm = lmInit(0);
char *row[VCFDATALINE_NUM_COLS];
char numBuf[VCF_NUM_BUF_SIZE];
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (rec = vcff->records;  rec != NULL;  rec = rec->next)
    {
    vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
    if (asFilterOnRow(filter, row))
        {
	if ((idHash != NULL) && (hashLookup(idHash, rec->name) == NULL))
	    continue;
	struct bed *bed;
	lmAllocVar(bedLm, bed);
	bed->chrom = lmCloneString(bedLm, region->chrom);
	bed->chromStart = rec->chromStart;
	bed->chromEnd = rec->chromEnd;
	bed->name = lmCloneString(bedLm, rec->name);
	slAddHead(pBedList, bed);
	}
    (*pMaxOut)--;
    if (*pMaxOut <= 0)
	break;
    }
dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
lmCleanup(&lm);
vcfFileFree(&vcff);
}
Пример #5
0
static boolean allelesExists(struct section *section,
        struct sqlConnection *conn, char *geneId)
// Return TRUE if common haplotype alleles exist.
{
// If there was a reset then clear out settings
if (cartUsualBoolean(cart, HAPLO_RESET_ALL, FALSE))
    cartRemovePrefix(cart,HAPLO_TABLE "_" );

// Start with the default variables for haplotype retrieval
struct haploExtras *he = haplotypeExtrasDefault(database, 0);
section->extras = he;

// Need to get genePred struct from geneId
char where[128];
safef(where,sizeof(where),"name = '%s'",geneId);
struct genePred *gp = genePredReaderLoadQuery(conn,he->geneTable, where);
if (gp == NULL || gp->cdsStart == gp->cdsEnd)  // Ain't interested in non-protein coding genes
    {
    haplotypeExtrasFree(&he);
    return FALSE;
    }

he->chrom = gp->chrom; // Probably not needed
he->justModel = lmCloneString(he->lm,geneId);
//he->growTree = FALSE; // Tree growing not needed here

// Need to determine the correct vcf file and open it
if (haplotypesDiscoverVcfFile(he, gp->chrom) == NULL)
    {
    haplotypeExtrasFree(&he);
    return FALSE;
    }

struct vcfFile *vcff = vcfTabixFileMayOpen(he->inFile, NULL, 0, 0,VCF_IGNORE_ERRS, 0);
if (vcff == NULL)
    {
    haplotypeExtrasFree(&he);
    return FALSE;
    }
vcfFileMakeReusePool(vcff,1024 * 1024);

// All or Limit to the 99%
boolean rareVars =  cartUsualBoolean(cart, HAPLO_RARE_VAR, FALSE);
if (rareVars)
    he->synonymous = TRUE;
else
    he->variantMinPct = HAPLO_COMMON_VARIANT_MIN_PCT;

// Lets show the population distribution
he->populationsToo   = cartUsualBoolean(cart, HAPLO_MAJOR_DIST, FALSE);
he->populationsMinor = cartUsualBoolean(cart, HAPLO_MINOR_DIST, FALSE);
if (he->populationsToo)
    he->populationMinPct = 5;
else if (he->populationsMinor)
    {
    he->populationsMinor = FALSE;
    cartRemove(cart, HAPLO_MINOR_DIST );
    }

// Need to generate haplotypes
section->items = geneHapSetForOneModel(he,vcff,gp,TRUE);

return TRUE;// (section->items != NULL);
}
Пример #6
0
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
	       boolean isTabix)
/* Print out selected fields from VCF.  If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;

/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
    idHash = identifierHash(db, table);

if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = vcfGetFields();
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    {
    /* if we know the field for identifiers, save it away */
    if ((idField != NULL) && sameString(idField, bb->name))
	idFieldNum = i;
    hashAddInt(fieldHash, bb->name, i);
    }

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

// If we are outputting a subset of fields, invalidate the VCF header.
boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS);
if (!allFields)
    fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n");

struct asObject *as = vcfAsObj();
struct asFilter *filter = NULL;
if (anyFilter())
    filter = asFilterFromCart(cart, db, table, as);

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
int maxOut = bigFileMaxOutput();
struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
// Include the header, absolutely necessary for VCF parsing.
boolean printedHeader = FALSE;
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
    {
    char *fileName = vcfFileName(tdb, conn, table, region->chrom);
    struct vcfFile *vcff;
    if (isTabix)
	vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
				   100, maxOut);
    else
	vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
			      100, maxOut, TRUE);
    if (vcff == NULL)
	noWarnAbort();
    // If we are outputting all fields, but this VCF has no genotype info, omit the
    // genotype columns from output:
    if (allFields && vcff->genotypeCount == 0)
	fieldCount = VCFDATALINE_NUM_COLS - 2;
    if (!printedHeader)
	{
	fprintf(f, "%s", vcff->headerString);
	if (filter)
	    fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	if (!allFields)
	    {
	    fprintf(f, "#%s", fieldArray[0]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", fieldArray[i]);
	    fprintf(f, "\n");
	    }
	printedHeader = TRUE;
	}
    char *row[VCFDATALINE_NUM_COLS];
    char numBuf[VCF_NUM_BUF_SIZE];
    for (rec = vcff->records;  rec != NULL && (maxOut > 0);  rec = rec->next)
        {
	vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
	if (asFilterOnRow(filter, row))
	    {
	    /* if we're looking for identifiers, check if this matches */
	    if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL))
		continue;
	    // All fields output: after asFilter'ing, preserve original VCF chrom
	    if (allFields && !sameString(rec->chrom, region->chrom))
		row[0] = rec->chrom;
	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		{
		fprintf(f, "\t%s", row[columnArray[i]]);
		}
	    fprintf(f, "\n");
	    maxOut --;
	    }
	}
    vcfFileFree(&vcff);
    freeMem(fileName);
    }

if (maxOut == 0)
    warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
/* Clean up and exit. */
dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}