Пример #1
0
void printEnrichment(struct sqlConnection *conn, long long fileId, char *target, FILE *f)
/* Get enrichment for file and target and print it if possible. */
{
struct edwQaEnrich *enrich = edwQaEnrichFind(conn, fileId, target);
if (enrich != NULL)
    {
    fprintf(f, "<TD>%4.2fx</TD>\n", enrich->enrichment);
    fprintf(f, "<TD>%4.3f</TD>\n", enrich->coverage);
    }
else
    {
    fprintf(f, "<TD>n/a</TD>\n");
    fprintf(f, "<TD>n/a</TD>\n");
    }
edwQaEnrichFree(&enrich);
}
Пример #2
0
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from sample bed file. */
{
char *sampleBed = vf->sampleBed;
if (isEmpty(sampleBed))
    {
    warn("No sample bed for %s", ef->edwFileName);
    return;
    }

/* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/
struct bed3 *sample, *sampleList = bed3LoadAll(sampleBed);
if (sampleList == NULL)
    {
    warn("Sample bed is empty for %s", ef->edwFileName);
    return;
    }
struct genomeRangeTree *sampleGrt = edwMakeGrtFromBed3List(sampleList);
struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash);

/* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
        continue;
    struct genomeRangeTree *grt = target->grt;
    long long uniqOverlapBases = 0;
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	struct rbTree *sampleTree = chrom->val;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct range *range, *rangeList = rangeTreeList(sampleTree);
	    for (range = rangeList; range != NULL; range = range->next)
		{
		/* Do unique base overlap counts (since using range trees both sides) */
		int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end);
		uniqOverlapBases += overlap;
		}
	    }
	}

    /* Figure out how much we overlap allowing same bases in genome
     * to part of more than one overlap. */ 
    long long overlapBases = 0;
    for (sample = sampleList; sample != NULL; sample = sample->next)
        {
	int overlap = genomeRangeTreeOverlapSize(grt, 
	    sample->chrom, sample->chromStart, sample->chromEnd);
	overlapBases += overlap;
	}

    /* Save to database. */
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly,
	target, overlapBases, uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }
genomeRangeTreeFree(&sampleGrt);
bed3FreeList(&sampleList);
hashElFreeList(&chromList);
}
Пример #3
0
/* This old way is ~3 times as slow */
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = edwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    long startBigQueryTime = clock1000();
    struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    struct bbiInterval *iv;

    /* Loop through all targets adding overlaps from ivList */
    long startOverlapTime = clock1000();
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->uniqOverlapBases += overlap;
		target->overlapBases += overlap * iv->val;
		}
	    }
	}
    long endOverlapTime = clock1000();
    totalOverlapTime += endOverlapTime - startOverlapTime;
    lmCleanup(&lm);
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Пример #4
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = edwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Пример #5
0
void doEnrichmentsFromBigBed(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf, 
    struct edwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigBedPath = edwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigBedFileOpen(bigBedPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigBed data in memory. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigBed for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    struct bigBedInterval *ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
    struct bigBedInterval *iv;
    struct rbTree *bbTree = rangeTreeNew();
    for (iv = ivList; iv != NULL; iv = iv->next)
	 rangeTreeAdd(bbTree, iv->start, iv->end);
    struct range *bbRange, *bbRangeList = rangeTreeList(bbTree);

    /* Loop through all targets adding overlaps from ivList and unique overlaps from bbRangeList */
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	if (target->skip)
	    continue;
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct bigBedInterval *iv;
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->overlapBases += overlap;
		}
	    for (bbRange = bbRangeList; bbRange != NULL; bbRange = bbRange->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, bbRange->start, bbRange->end);
		target->uniqOverlapBases += overlap;
		}
	    }
	}
    rangeTreeFree(&bbTree);
    lmCleanup(&lm);
    }

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128);
    edwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigBedFileClose(&bbi);
freez(&bigBedPath);
}