Esempio n. 1
0
void doBed3Replicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly,
    struct cdwFile *elderEf, struct cdwValidFile *elderVf, struct bed3 *elderBedList,
    struct cdwFile *youngerEf, struct cdwValidFile *youngerVf, struct bed3 *youngerBedList)
/* Do correlation analysis between elder and younger bedLists and save result to
 * a new cdwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */
{
struct cdwQaPairSampleOverlap *sam;
AllocVar(sam);
sam->elderFileId = elderVf->fileId;
sam->youngerFileId = youngerVf->fileId;
sam->elderSampleBases = elderVf->basesInSample;
sam->youngerSampleBases = youngerVf->basesInSample;

/* Load up elder into genome range tree. */
struct genomeRangeTree *elderGrt = cdwMakeGrtFromBed3List(elderBedList);

/* Load up younger as bed, and loop through to get overlap */
long long totalOverlap = 0;
struct bed3 *bed;
for (bed = youngerBedList; bed != NULL; bed = bed->next)
    {
    int overlap = genomeRangeTreeOverlapSize(elderGrt, 
	bed->chrom, bed->chromStart, bed->chromEnd);
    totalOverlap += overlap;
    }
sam->sampleOverlapBases = totalOverlap;
setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);

/* Save to database, clean up, go home. */
cdwQaPairSampleOverlapSaveToDb(conn, sam, "cdwQaPairSampleOverlap", 128);
freez(&sam);
genomeRangeTreeFree(&elderGrt);
}
Esempio n. 2
0
long getAnnotatedNonGapBases(struct genomeRangeTree* ranges, struct bed* antigaps)
{
	long retval = 0;
	struct bed* currAntigap;
	for (currAntigap = antigaps; currAntigap != NULL; currAntigap = currAntigap->next) {
		retval += genomeRangeTreeOverlapSize(ranges, currAntigap->chrom, currAntigap->chromStart, currAntigap->chromEnd);
	}
	return retval;
}
Esempio n. 3
0
void doEnrichmentsFromBed3Sample(struct bed3 *sampleList,
    struct sqlConnection *conn,
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Given a bed3 list,  calculate enrichments for targets */
{
struct genomeRangeTree *sampleGrt = cdwMakeGrtFromBed3List(sampleList);
struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash);

/* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
        continue;
    struct genomeRangeTree *grt = target->grt;
    long long uniqOverlapBases = 0;
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	struct rbTree *sampleTree = chrom->val;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct range *range, *rangeList = rangeTreeList(sampleTree);
	    for (range = rangeList; range != NULL; range = range->next)
		{
		/* Do unique base overlap counts (since using range trees both sides) */
		int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end);
		uniqOverlapBases += overlap;
		}
	    }
	}

    /* Figure out how much we overlap allowing same bases in genome
     * to part of more than one overlap. */ 
    long long overlapBases = 0;
    struct bed3 *sample;
    for (sample = sampleList; sample != NULL; sample = sample->next)
        {
	int overlap = genomeRangeTreeOverlapSize(grt, 
	    sample->chrom, sample->chromStart, sample->chromEnd);
	overlapBases += overlap;
	}

    /* Save to database. */
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly,
	target, overlapBases, uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }
genomeRangeTreeFree(&sampleGrt);
hashElFreeList(&chromList);
}
Esempio n. 4
0
void outputOverlappingGrt(struct bed *chiaList, struct genomeRangeTree *grt,
	double *out1, double *out2)
{
int chiaIx = 0;
struct bed *chia;
for (chia = chiaList; chia != NULL; chia = chia->next)
    {
    int blockStart = chia->chromStart;
    int blockSize = chia->blockSizes[0];
    int blockEnd = blockStart + blockSize;
    int overlap = genomeRangeTreeOverlapSize(grt, chia->chrom, blockStart, blockEnd);
    out1[chiaIx] = (double)overlap/blockSize;

    blockStart = chia->chromStart + chia->chromStarts[1];
    blockSize = chia->blockSizes[1];
    blockEnd = blockStart + blockSize;
    overlap = genomeRangeTreeOverlapSize(grt, chia->chrom, blockStart, blockEnd);
    out2[chiaIx] = (double)overlap/blockSize;

    ++chiaIx;
    }
}
Esempio n. 5
0
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from sample bed file. */
{
char *sampleBed = vf->sampleBed;
if (isEmpty(sampleBed))
    {
    warn("No sample bed for %s", ef->edwFileName);
    return;
    }

/* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/
struct bed3 *sample, *sampleList = bed3LoadAll(sampleBed);
if (sampleList == NULL)
    {
    warn("Sample bed is empty for %s", ef->edwFileName);
    return;
    }
struct genomeRangeTree *sampleGrt = edwMakeGrtFromBed3List(sampleList);
struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash);

/* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
        continue;
    struct genomeRangeTree *grt = target->grt;
    long long uniqOverlapBases = 0;
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	struct rbTree *sampleTree = chrom->val;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct range *range, *rangeList = rangeTreeList(sampleTree);
	    for (range = rangeList; range != NULL; range = range->next)
		{
		/* Do unique base overlap counts (since using range trees both sides) */
		int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end);
		uniqOverlapBases += overlap;
		}
	    }
	}

    /* Figure out how much we overlap allowing same bases in genome
     * to part of more than one overlap. */ 
    long long overlapBases = 0;
    for (sample = sampleList; sample != NULL; sample = sample->next)
        {
	int overlap = genomeRangeTreeOverlapSize(grt, 
	    sample->chrom, sample->chromStart, sample->chromEnd);
	overlapBases += overlap;
	}

    /* Save to database. */
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly,
	target, overlapBases, uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }
genomeRangeTreeFree(&sampleGrt);
bed3FreeList(&sampleList);
hashElFreeList(&chromList);
}