void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; char *enrichedIn = elderVf->enrichedIn; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) { struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big wig files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigWigFileOpen(elderPath); struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath); /* Figure out thresholds */ double elderThreshold = twoStdsOverMean(elderBbi); double youngerThreshold = twoStdsOverMean(youngerBbi); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, elderThreshold, youngerThreshold, c, cInEnriched, cClipped); } /* Make up correlation structure . */ struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); cor->pearsonClipped = correlateResult(cClipped); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); bigWigValsOnChromFree(&bVals); bigWigValsOnChromFree(&aVals); genomeRangeTreeFree(&targetGrt); freez(&cor); correlateFree(&c); bigWigFileClose(&youngerBbi); bigWigFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); } }
double correlateArrays(double *x, double *y, int size) /* Return correlation of two arrays of doubles. */ { struct correlate *c = correlateNew(); double r; int i; for (i=0; i<size; ++i) correlateNext(c, x[i], y[i]); r = correlateResult(c); correlateFree(&c); return r; }
void bigWigCorrelate(char *aFileName, char *bFileName) /* bigWigCorrelate - Correlate bigWig files, optionally only on target regions.. */ { struct genomeRangeTree *targetGrt = NULL; if (restrictFile) targetGrt = grtFromBigBed(restrictFile); struct bbiFile *aBbi = bigWigFileOpen(aFileName); struct bbiFile *bBbi = bigWigFileOpen(bFileName); struct correlate *c = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(aBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, aBbi, bBbi, threshold, threshold, c); } printf("%g\n", correlateResult(c)); }
double correlatePair(struct metaWig *a, struct metaWig *b) /* Correlate a pair of metaWigs. */ { struct correlate *c = correlateNew(); struct slName *chrom, *chromList = metaWigChromList(a); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct lm *lm = lmInit(0); struct bbiInterval *aList = metaIntervalsForChrom(a, chrom->name, lm); struct bbiInterval *bList = metaIntervalsForChrom(b, chrom->name, lm); verbose(2, "%s a(%d) b(%d)\n", chrom->name, slCount(aList), slCount(bList)); sortedApplyOverlapping(aList, bList, slListNextWrapper, bbiIntervalCmpEnd, bbiIntervalOverlap, bbiIntervalCorrelatePairWrapper, c); lmCleanup(&lm); } slFreeList(&chromList); double result = correlateResult(c); verbose(2, "correlate: r %g, sumX %g, sumY %g, n %lld\n", result, c->sumX, c->sumY, c->n); correlateFree(&c); return result; }
double chromGraphBinCorrelate(char *aFile, char *bFile) /* Do correlation between two graphs. */ { struct chromGraphBin *a = chromGraphBinOpen(aFile); struct chromGraphBin *b = chromGraphBinOpen(bFile); struct cgbChrom *chrom; struct correlate *c = correlateNew(); double r; for (chrom = a->chromList; chrom != NULL; chrom = chrom->next) { chromGraphBinSeekToChrom(a, chrom->name); if (chromGraphBinSeekToChrom(b, chrom->name)) { correlateChrom(a, b, c); } } r = correlateResult(c); correlateFree(&c); return r; }
void doBigBedReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigBed file. */ { /* If got both pairs, work is done already */ if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap") && pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; int numColIx = 0; if (sameString(format, "narrowPeak") || sameString(format, "broadPeak")) numColIx = 6; // signalVal else numColIx = 4; // score numColIx -= 3; // Subtract off chrom/start/end char *enrichedIn = elderVf->enrichedIn; struct genomeRangeTree *targetGrt = NULL; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big bed files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigBedFileOpen(elderPath); struct bbiFile *youngerBbi = bigBedFileOpen(youngerPath); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); long long elderTotalSpan = 0, youngerTotalSpan = 0, overlapTotalSpan = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBbCorrelations(chrom, targetGrt, elderBbi, youngerBbi, numColIx, c, cInEnriched, &elderTotalSpan, &youngerTotalSpan, &overlapTotalSpan); } /* Make up correlation structure and save. */ if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) { struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); freez(&cor); } /* Also make up sample structure and save. */ if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap")) { struct edwQaPairSampleOverlap *sam; AllocVar(sam); sam->elderFileId = elderVf->fileId; sam->youngerFileId = youngerVf->fileId; sam->elderSampleBases = elderTotalSpan; sam->youngerSampleBases = youngerTotalSpan; sam->sampleOverlapBases = overlapTotalSpan; setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf); edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128); freez(&sam); } genomeRangeTreeFree(&targetGrt); correlateFree(&c); bigBedFileClose(&youngerBbi); bigBedFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); }