Пример #1
0
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;
char *enrichedIn = elderVf->enrichedIn;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    {
    struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

    /* Get open big wig files for both younger and older. */
    char *elderPath = edwPathForFileId(conn, elderEf->id);
    char *youngerPath = edwPathForFileId(conn, youngerEf->id);
    struct bbiFile *elderBbi = bigWigFileOpen(elderPath);
    struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath);

    /* Figure out thresholds */
    double elderThreshold = twoStdsOverMean(elderBbi);
    double youngerThreshold = twoStdsOverMean(youngerBbi);

    /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
    struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew();
    struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
    struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
    struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, 
	    elderThreshold, youngerThreshold, c, cInEnriched, cClipped);
	}

    /* Make up correlation structure . */
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    cor->pearsonClipped = correlateResult(cClipped);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);


    bigWigValsOnChromFree(&bVals);
    bigWigValsOnChromFree(&aVals);
    genomeRangeTreeFree(&targetGrt);
    freez(&cor);
    correlateFree(&c);
    bigWigFileClose(&youngerBbi);
    bigWigFileClose(&elderBbi);
    freez(&youngerPath);
    freez(&elderPath);
    }
}
Пример #2
0
void metaBigClose(struct metaBig** pMb)
/* close the file and free up everything. */
{
    struct metaBig* mb = *pMb;
    hashFree(&mb->chromSizeHash);
    if (mb->rgList)
        hashFree(&mb->rgList);
    if (mb->sections)
        bedFreeList(&mb->sections);
    if (mb->originalFileName)
        freeMem(mb->originalFileName);
    if (mb->fileName)
        freeMem(mb->fileName);
    if (mb->baseFileName)
        freeMem(mb->baseFileName);
    if (mb->remoteSiteAndDir)
        freeMem(mb->remoteSiteAndDir);
#ifdef USE_HTSLIB
    if (mb->idx)
        hts_idx_destroy(mb->idx);
#endif
    if (mb->type == isaBigBed)
        bigBedFileClose(&mb->big.bbi);
#ifdef USE_HTSLIB
    else if (mb->type == isaBam)
        sam_close(mb->big.bam);
#endif
    else
        bigWigFileClose(&mb->big.bbi);
#ifdef USE_HTSLIB
    if (mb->header)
        bam_hdr_destroy(mb->header);
#endif
    freez(pMb);
}
Пример #3
0
static void asbwClose(struct annoStreamer **pVSelf)
/* Close bbi handle and free self. */
{
if (pVSelf == NULL)
    return;
struct annoStreamBigWig *self = *(struct annoStreamBigWig **)pVSelf;
bigWigFileClose(&(self->bbi));
self->intervalList = NULL;
lmCleanup(&(self->intervalQueryLm));
annoStreamerFree(pVSelf);
}
Пример #4
0
char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, 
    struct composite *comp, struct taggedFile *tfList)
/* Print out info for a bigWig view, including subtracks. */
{
/* Look at all tracks in this view and calculate overall limits. */
double sumOfSums = 0, sumOfSumSquares = 0;
bits64 sumOfN = 0;
struct taggedFile *tf;
for (tf = tfList; tf != NULL; tf = tf->next)
    {
    if (sameString(tf->manifest->outputType, view->name))
	{
	char *relativeName = tf->manifest->fileName;
	char *path = relativeName;
	struct bbiFile *bbi = bigWigFileOpen(path);
	struct bbiSummaryElement sum = bbiTotalSummary(bbi);
	sumOfSums += sum.sumData;
	sumOfSumSquares += sum.sumSquares;
	sumOfN = sum.validCount;
	bigWigFileClose(&bbi);
	}
    }
double mean = sumOfSums/sumOfN;
double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN);
double clipMax = mean + 6*std;

/* Output view stanza. */
char type[64];
safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax);
fprintf(f, "%stype %s\n", indent, type);
fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax);
fprintf(f, "%sminLimit 0\n", indent);
fprintf(f, "%smaxLimit %g\n", indent, clipMax);
fprintf(f, "%sautoScale off\n", indent);
fprintf(f, "%smaxHeightPixels 100:32:16\n", indent);
fprintf(f, "%swindowingFunction mean+whiskers\n", indent);
return cloneString(type);
}
Пример #5
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Пример #6
0
/* This old way is ~3 times as slow */
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    long startBigQueryTime = clock1000();
    struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    struct bbiInterval *iv;

    /* Loop through all targets adding overlaps from ivList */
    long startOverlapTime = clock1000();
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->uniqOverlapBases += overlap;
		target->overlapBases += overlap * iv->val;
		}
	    }
	}
    long endOverlapTime = clock1000();
    totalOverlapTime += endOverlapTime - startOverlapTime;
    lmCleanup(&lm);
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}