void doBedReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly, struct cdwFile *elderEf, struct cdwValidFile *elderVf, struct cdwFile *youngerEf, struct cdwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new cdwQaPairCorrelation record. Do this for a format where we have a bigBed file. */ { /* If got both pairs, work is done already */ if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap")) return; /* Get files for both younger and older. */ char *elderPath = cdwPathForFileId(conn, elderEf->id); char *youngerPath = cdwPathForFileId(conn, youngerEf->id); /* Do replicate calcs on bed3 lists from files. */ struct bed3 *elderBedList = bed3LoadAll(elderPath); struct bed3 *youngerBedList = bed3LoadAll(youngerPath); doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList, youngerEf, youngerVf, youngerBedList); /* Clean up. */ bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); freez(&youngerPath); freez(&elderPath); }
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; char *enrichedIn = elderVf->enrichedIn; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) { struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big wig files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigWigFileOpen(elderPath); struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath); /* Figure out thresholds */ double elderThreshold = twoStdsOverMean(elderBbi); double youngerThreshold = twoStdsOverMean(youngerBbi); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, elderThreshold, youngerThreshold, c, cInEnriched, cClipped); } /* Make up correlation structure . */ struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); cor->pearsonClipped = correlateResult(cClipped); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); bigWigValsOnChromFree(&bVals); bigWigValsOnChromFree(&aVals); genomeRangeTreeFree(&targetGrt); freez(&cor); correlateFree(&c); bigWigFileClose(&youngerBbi); bigWigFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); } }
void doSampleReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly, struct cdwFile *elderEf, struct cdwValidFile *elderVf, struct cdwFile *youngerEf, struct cdwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new cdwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap")) return; struct bed3 *elderBedList = cdwLoadSampleBed3(conn, elderVf); struct bed3 *youngerBedList = cdwLoadSampleBed3(conn, youngerVf); doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList, youngerEf, youngerVf, youngerBedList); bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); }
void doSampleReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap")) return; struct edwQaPairSampleOverlap *sam; AllocVar(sam); sam->elderFileId = elderVf->fileId; sam->youngerFileId = youngerVf->fileId; sam->elderSampleBases = elderVf->basesInSample; sam->youngerSampleBases = youngerVf->basesInSample; /* Load up elder into genome range tree. */ struct bed3 *elderBedList = edwLoadSampleBed3(conn, elderVf); struct genomeRangeTree *elderGrt = edwMakeGrtFromBed3List(elderBedList); /* Load up younger as bed, and loop through to get overlap */ long long totalOverlap = 0; struct bed3 *bed, *youngerBedList = edwLoadSampleBed3(conn, youngerVf); for (bed = youngerBedList; bed != NULL; bed = bed->next) { int overlap = genomeRangeTreeOverlapSize(elderGrt, bed->chrom, bed->chromStart, bed->chromEnd); totalOverlap += overlap; } sam->sampleOverlapBases = totalOverlap; setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf); /* Save to database, clean up, go home. */ edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128); freez(&sam); genomeRangeTreeFree(&elderGrt); bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); }
void doBigBedReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigBed file. */ { /* If got both pairs, work is done already */ if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap") && pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; int numColIx = 0; if (sameString(format, "narrowPeak") || sameString(format, "broadPeak")) numColIx = 6; // signalVal else numColIx = 4; // score numColIx -= 3; // Subtract off chrom/start/end char *enrichedIn = elderVf->enrichedIn; struct genomeRangeTree *targetGrt = NULL; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big bed files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigBedFileOpen(elderPath); struct bbiFile *youngerBbi = bigBedFileOpen(youngerPath); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); long long elderTotalSpan = 0, youngerTotalSpan = 0, overlapTotalSpan = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBbCorrelations(chrom, targetGrt, elderBbi, youngerBbi, numColIx, c, cInEnriched, &elderTotalSpan, &youngerTotalSpan, &overlapTotalSpan); } /* Make up correlation structure and save. */ if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) { struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); freez(&cor); } /* Also make up sample structure and save. */ if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap")) { struct edwQaPairSampleOverlap *sam; AllocVar(sam); sam->elderFileId = elderVf->fileId; sam->youngerFileId = youngerVf->fileId; sam->elderSampleBases = elderTotalSpan; sam->youngerSampleBases = youngerTotalSpan; sam->sampleOverlapBases = overlapTotalSpan; setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf); edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128); freez(&sam); } genomeRangeTreeFree(&targetGrt); correlateFree(&c); bigBedFileClose(&youngerBbi); bigBedFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); }