void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; char *enrichedIn = elderVf->enrichedIn; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) { struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big wig files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigWigFileOpen(elderPath); struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath); /* Figure out thresholds */ double elderThreshold = twoStdsOverMean(elderBbi); double youngerThreshold = twoStdsOverMean(youngerBbi); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, elderThreshold, youngerThreshold, c, cInEnriched, cClipped); } /* Make up correlation structure . */ struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); cor->pearsonClipped = correlateResult(cClipped); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); bigWigValsOnChromFree(&bVals); bigWigValsOnChromFree(&aVals); genomeRangeTreeFree(&targetGrt); freez(&cor); correlateFree(&c); bigWigFileClose(&youngerBbi); bigWigFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); } }
void bigWigCorrelate(char *aFileName, char *bFileName) /* bigWigCorrelate - Correlate bigWig files, optionally only on target regions.. */ { struct genomeRangeTree *targetGrt = NULL; if (restrictFile) targetGrt = grtFromBigBed(restrictFile); struct bbiFile *aBbi = bigWigFileOpen(aFileName); struct bbiFile *bBbi = bigWigFileOpen(bFileName); struct correlate *c = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(aBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, aBbi, bBbi, threshold, threshold, c); } printf("%g\n", correlateResult(c)); }
void averageFetchingEachChrom(struct bbiFile *bbi, struct bed **pBedList, int fieldCount, FILE *f, FILE *bedF) /* Do the averaging by sorting bedList by chromosome, and then processing each chromosome * at once. Faster for long bedLists. */ { /* Sort by chromosome. */ slSort(pBedList, bedCmpChrom); struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew(); struct bed *bed, *bedList, *nextChrom; verbose(1, "processing chromosomes"); for (bedList = *pBedList; bedList != NULL; bedList = nextChrom) { /* Figure out which chromosome we're working on, and the last bed using it. */ char *chrom = bedList->chrom; nextChrom = nextChromInList(bedList); verbose(2, "Processing %s\n", chrom); if (bigWigValsOnChromFetchData(chromVals, chrom, bbi)) { double *valBuf = chromVals->valBuf; Bits *covBuf = chromVals->covBuf; /* Loop through beds doing sums and outputting. */ for (bed = bedList; bed != nextChrom; bed = bed->next) { int size = 0, coverage = 0; double sum = 0.0; if (sampleAroundCenter > 0) { int center = (bed->chromStart + bed->chromEnd)/2; int left = center - (sampleAroundCenter/2); addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter, &size, &coverage, &sum); } else { if (fieldCount < 12) { addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd, &size, &coverage, &sum); } else { int i; for (i=0; i<bed->blockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum); } } } /* Print out result, fudging mean to 0 if no coverage at all. */ double mean = 0; if (coverage > 0) mean = sum/coverage; fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean); optionallyPrintBedPlus(bedF, bed, fieldCount, mean); } verboseDot(); } else { /* If no bigWig data on this chromosome, just output as if coverage is 0 */ for (bed = bedList; bed != nextChrom; bed = bed->next) { fprintf(f, "%s\t%d\t0\t0\t0\t0\n", bed->name, bedTotalBlockSize(bed)); optionallyPrintBedPlus(bedF, bed, fieldCount, 0); } } } verbose(1, "\n"); }
void doEnrichmentsFromBigWig(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bigBed file. */ { /* Get path to bigBed, open it, and read all chromosomes. */ char *bigWigPath = cdwPathForFileId(conn, ef->id); struct bbiFile *bbi = bigWigFileOpen(bigWigPath); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew(); /* This takes a while, so let's figure out what parts take the time. */ long totalBigQueryTime = 0; long totalOverlapTime = 0; /* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases * for all targets. This is complicated by just wanting to keep one chromosome worth of * bigWig data in memory. Also just for performance we do a lookup of target range tree to * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { long startBigQueryTime = clock1000(); boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi); long endBigQueryTime = clock1000(); totalBigQueryTime += endBigQueryTime - startBigQueryTime; if (gotData) { double *valBuf = valsOnChrom->valBuf; Bits *covBuf = valsOnChrom->covBuf; /* Loop through all targets adding overlaps from ivList */ long startOverlapTime = clock1000(); struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(targetTree); for (range = rangeList; range != NULL; range = range->next) { int s = range->start, e = range->end, i; for (i=s; i<=e; ++i) { if (bitReadOne(covBuf, i)) { double x = valBuf[i]; target->uniqOverlapBases += 1; target->overlapBases += x; } } } } } long endOverlapTime = clock1000(); totalOverlapTime += endOverlapTime - startOverlapTime; } } verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime); /* Now loop through targets and save enrichment info to database */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, target->overlapBases, target->uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } bigWigValsOnChromFree(&valsOnChrom); bbiChromInfoFreeList(&chromList); bigWigFileClose(&bbi); freez(&bigWigPath); }
void regCompanionChia(char *inBedPairs, char *output) /* regCompanionChia - Analyse chia pet data against promoters and enhancers.. */ { inArray[0].fileName = inBedPairs; checkInputOpenFiles(inArray, ArraySize(inArray)); verbose(1, "Opened all %d inputs successfully\n", (int)ArraySize(inArray)); struct bed *chiaList = bedLoadTwoBlocks(inArray[0].lf); slSort(&chiaList, bedCmp); int chiaCount = slCount(chiaList); struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew(); int inIx; for (inIx=1; inIx < ArraySize(inArray); ++inIx) { /* Allocate output arrays. */ struct inInfo *in = &inArray[inIx]; double *out1 = AllocArray(in->out[0], chiaCount); double *out2 = AllocArray(in->out[1], chiaCount); /* Process input depending on type. */ verbose(1, "Processing %s", in->fileName); switch(in->type) { case itPromoterBed: doItPromoterBed(in, chiaList, out1, out2); break; case itUnstrandedBed: doItUnstrandedBed(in, chiaList, out1, out2); break; case itBigWig: doItBigWig(in, chiaList, chromVals, out1, out2); break; default: internalErr(); break; } verbose(1, "\n"); } /* do output */ FILE *f = mustOpen(output, "w"); struct bed *chia; int chiaIx = 0; for (chia = chiaList; chia != NULL; chia = chia->next, ++chiaIx) { // fprintf(f, "%s\t%d\t%d\tchia%d\t%d", chia->chrom, chia->chromStart, chia->chromEnd, chiaIx+1, chia->score); fprintf(f, "%s\t%d\t%d\t%s\t%d", chia->chrom, chia->chromStart, chia->chromEnd, chia->name, chia->score); int blockIx; for (blockIx=0; blockIx < 2; ++blockIx) { fprintf(f, "\tblock%d\t%d", blockIx+1, chia->blockSizes[blockIx]); for (inIx=1; inIx < ArraySize(inArray); ++inIx) { struct inInfo *in = &inArray[inIx]; double *out = in->out[blockIx]; fprintf(f, "\t%g", out[chiaIx]); } } fprintf(f, "\n"); } carefulClose(&f); }