void doBedReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly, struct cdwFile *elderEf, struct cdwValidFile *elderVf, struct cdwFile *youngerEf, struct cdwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new cdwQaPairCorrelation record. Do this for a format where we have a bigBed file. */ { /* If got both pairs, work is done already */ if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap")) return; /* Get files for both younger and older. */ char *elderPath = cdwPathForFileId(conn, elderEf->id); char *youngerPath = cdwPathForFileId(conn, youngerEf->id); /* Do replicate calcs on bed3 lists from files. */ struct bed3 *elderBedList = bed3LoadAll(elderPath); struct bed3 *youngerBedList = bed3LoadAll(youngerPath); doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList, youngerEf, youngerVf, youngerBedList); /* Clean up. */ bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); freez(&youngerPath); freez(&elderPath); }
void doSampleReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly, struct cdwFile *elderEf, struct cdwValidFile *elderVf, struct cdwFile *youngerEf, struct cdwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new cdwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap")) return; struct bed3 *elderBedList = cdwLoadSampleBed3(conn, elderVf); struct bed3 *youngerBedList = cdwLoadSampleBed3(conn, youngerVf); doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList, youngerEf, youngerVf, youngerBedList); bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); }
void doEnrichmentsFromBed(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bed file. */ { char *bedPath = cdwPathForFileId(conn, ef->id); struct bed3 *sampleList = bed3LoadAll(bedPath); doEnrichmentsFromBed3Sample(sampleList, conn, ef, vf, assembly, targetList); bed3FreeList(&sampleList); freez(&bedPath); }
void doSampleReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap")) return; struct edwQaPairSampleOverlap *sam; AllocVar(sam); sam->elderFileId = elderVf->fileId; sam->youngerFileId = youngerVf->fileId; sam->elderSampleBases = elderVf->basesInSample; sam->youngerSampleBases = youngerVf->basesInSample; /* Load up elder into genome range tree. */ struct bed3 *elderBedList = edwLoadSampleBed3(conn, elderVf); struct genomeRangeTree *elderGrt = edwMakeGrtFromBed3List(elderBedList); /* Load up younger as bed, and loop through to get overlap */ long long totalOverlap = 0; struct bed3 *bed, *youngerBedList = edwLoadSampleBed3(conn, youngerVf); for (bed = youngerBedList; bed != NULL; bed = bed->next) { int overlap = genomeRangeTreeOverlapSize(elderGrt, bed->chrom, bed->chromStart, bed->chromEnd); totalOverlap += overlap; } sam->sampleOverlapBases = totalOverlap; setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf); /* Save to database, clean up, go home. */ edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128); freez(&sam); genomeRangeTreeFree(&elderGrt); bed3FreeList(&elderBedList); bed3FreeList(&youngerBedList); }
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from sample bed file. */ { char *sampleBed = vf->sampleBed; if (isEmpty(sampleBed)) { warn("No sample bed for %s", ef->cdwFileName); return; } /* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/ struct bed3 *sampleList = bed3LoadAll(sampleBed); if (sampleList == NULL) { warn("Sample bed is empty for %s", ef->cdwFileName); return; } doEnrichmentsFromBed3Sample(sampleList, conn, ef, vf, assembly, targetList); bed3FreeList(&sampleList); }
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf, struct edwAssembly *assembly, struct target *targetList) /* Figure out enrichments from sample bed file. */ { char *sampleBed = vf->sampleBed; if (isEmpty(sampleBed)) { warn("No sample bed for %s", ef->edwFileName); return; } /* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/ struct bed3 *sample, *sampleList = bed3LoadAll(sampleBed); if (sampleList == NULL) { warn("Sample bed is empty for %s", ef->edwFileName); return; } struct genomeRangeTree *sampleGrt = edwMakeGrtFromBed3List(sampleList); struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash); /* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; long long uniqOverlapBases = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct rbTree *sampleTree = chrom->val; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(sampleTree); for (range = rangeList; range != NULL; range = range->next) { /* Do unique base overlap counts (since using range trees both sides) */ int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end); uniqOverlapBases += overlap; } } } /* Figure out how much we overlap allowing same bases in genome * to part of more than one overlap. */ long long overlapBases = 0; for (sample = sampleList; sample != NULL; sample = sample->next) { int overlap = genomeRangeTreeOverlapSize(grt, sample->chrom, sample->chromStart, sample->chromEnd); overlapBases += overlap; } /* Save to database. */ struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, overlapBases, uniqOverlapBases); edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128); edwQaEnrichFree(&enrich); } genomeRangeTreeFree(&sampleGrt); bed3FreeList(&sampleList); hashElFreeList(&chromList); }