Exemplo n.º 1
0
void doBedReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly,
    struct cdwFile *elderEf, struct cdwValidFile *elderVf,
    struct cdwFile *youngerEf, struct cdwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new cdwQaPairCorrelation record. Do this for a format where we have a bigBed file. */
{
/* If got both pairs, work is done already */
if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap"))
    return;

/* Get files for both younger and older. */
char *elderPath = cdwPathForFileId(conn, elderEf->id);
char *youngerPath = cdwPathForFileId(conn, youngerEf->id);

/* Do replicate calcs on bed3 lists from files. */
struct bed3 *elderBedList = bed3LoadAll(elderPath);
struct bed3 *youngerBedList = bed3LoadAll(youngerPath);
doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList,
		youngerEf, youngerVf, youngerBedList);

/* Clean up. */
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
freez(&youngerPath);
freez(&elderPath);
}
Exemplo n.º 2
0
void doSampleReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly,
    struct cdwFile *elderEf, struct cdwValidFile *elderVf,
    struct cdwFile *youngerEf, struct cdwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new cdwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap"))
    return;
struct bed3 *elderBedList = cdwLoadSampleBed3(conn, elderVf);
struct bed3 *youngerBedList = cdwLoadSampleBed3(conn, youngerVf);
doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList,
		youngerEf, youngerVf, youngerBedList);
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
}
Exemplo n.º 3
0
void doEnrichmentsFromBed(struct sqlConnection *conn,
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bed file. */
{
char *bedPath = cdwPathForFileId(conn, ef->id);
struct bed3 *sampleList = bed3LoadAll(bedPath);
doEnrichmentsFromBed3Sample(sampleList, conn, ef, vf, assembly, targetList);
bed3FreeList(&sampleList);
freez(&bedPath);
}
Exemplo n.º 4
0
void doSampleReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap"))
    return;
struct edwQaPairSampleOverlap *sam;
AllocVar(sam);
sam->elderFileId = elderVf->fileId;
sam->youngerFileId = youngerVf->fileId;
sam->elderSampleBases = elderVf->basesInSample;
sam->youngerSampleBases = youngerVf->basesInSample;

/* Load up elder into genome range tree. */
struct bed3 *elderBedList = edwLoadSampleBed3(conn, elderVf);
struct genomeRangeTree *elderGrt = edwMakeGrtFromBed3List(elderBedList);

/* Load up younger as bed, and loop through to get overlap */
long long totalOverlap = 0;
struct bed3 *bed, *youngerBedList = edwLoadSampleBed3(conn, youngerVf);
for (bed = youngerBedList; bed != NULL; bed = bed->next)
    {
    int overlap = genomeRangeTreeOverlapSize(elderGrt, 
	bed->chrom, bed->chromStart, bed->chromEnd);
    totalOverlap += overlap;
    }
sam->sampleOverlapBases = totalOverlap;
setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);

/* Save to database, clean up, go home. */
edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128);
freez(&sam);
genomeRangeTreeFree(&elderGrt);
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
}
Exemplo n.º 5
0
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from sample bed file. */
{
char *sampleBed = vf->sampleBed;
if (isEmpty(sampleBed))
    {
    warn("No sample bed for %s", ef->cdwFileName);
    return;
    }
/* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/
struct bed3 *sampleList = bed3LoadAll(sampleBed);
if (sampleList == NULL)
    {
    warn("Sample bed is empty for %s", ef->cdwFileName);
    return;
    }
doEnrichmentsFromBed3Sample(sampleList, conn, ef, vf, assembly, targetList);
bed3FreeList(&sampleList);
}
Exemplo n.º 6
0
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from sample bed file. */
{
char *sampleBed = vf->sampleBed;
if (isEmpty(sampleBed))
    {
    warn("No sample bed for %s", ef->edwFileName);
    return;
    }

/* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/
struct bed3 *sample, *sampleList = bed3LoadAll(sampleBed);
if (sampleList == NULL)
    {
    warn("Sample bed is empty for %s", ef->edwFileName);
    return;
    }
struct genomeRangeTree *sampleGrt = edwMakeGrtFromBed3List(sampleList);
struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash);

/* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
        continue;
    struct genomeRangeTree *grt = target->grt;
    long long uniqOverlapBases = 0;
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	struct rbTree *sampleTree = chrom->val;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct range *range, *rangeList = rangeTreeList(sampleTree);
	    for (range = rangeList; range != NULL; range = range->next)
		{
		/* Do unique base overlap counts (since using range trees both sides) */
		int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end);
		uniqOverlapBases += overlap;
		}
	    }
	}

    /* Figure out how much we overlap allowing same bases in genome
     * to part of more than one overlap. */ 
    long long overlapBases = 0;
    for (sample = sampleList; sample != NULL; sample = sample->next)
        {
	int overlap = genomeRangeTreeOverlapSize(grt, 
	    sample->chrom, sample->chromStart, sample->chromEnd);
	overlapBases += overlap;
	}

    /* Save to database. */
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly,
	target, overlapBases, uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }
genomeRangeTreeFree(&sampleGrt);
bed3FreeList(&sampleList);
hashElFreeList(&chromList);
}