Ejemplo n.º 1
0
struct slName *randomBigBedIds(char *table, struct sqlConnection *conn, int count)
/* Return some arbitrary IDs from a bigBed file. */
{
/* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
struct slName *idList = NULL;
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chromList = bbiChromList(bbi);
struct lm *lm = lmInit(0);
int orderedCount = count * 4;
if (orderedCount < 100)
    orderedCount = 100;
struct bigBedInterval *iv, *ivList = getNElements(bbi, chromList, lm, orderedCount);
shuffleList(&ivList);
// Make a list of item names from intervals.
int outCount = 0;
for (iv = ivList;  iv != NULL && outCount < count;  iv = iv->next)
    {
    char *row[bbi->fieldCount];
    char startBuf[16], endBuf[16];
    bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
    if (idList == NULL || differentString(row[3], idList->name))
	{
	slAddHead(&idList, slNameNew(row[3]));
	outCount++;
	}
    }
lmCleanup(&lm);
bbiFileClose(&bbi);
freeMem(fileName);
return idList;
}
Ejemplo n.º 2
0
void bigPslToPsl(char *bigPslName, char *outputName)
/* bigPslToPsl - convert bigPsl file to psle. */
{
struct bbiFile *bbi = bigBedFileOpen(bigPslName);
struct lm *lm = lmInit(0);
FILE *f = mustOpen(outputName, "w");
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    int start = 0, end = chrom->size;
    int itemsLeft = 0;
    char *chromName = chrom->name;

    struct bigBedInterval  *bbList = bigBedIntervalQuery(bbi, chromName,
            start, end, itemsLeft, lm);
    
    for(; bbList; bbList = bbList->next)
        {
        struct psl *psl, *pslList = pslFromBigPsl(chromName, bbList, NULL, NULL);

        for(psl=pslList; psl; psl = psl->next)
            {
            if (collapseStrand && (psl->strand[1] == '+'))
                psl->strand[1] = 0;
            pslTabOut(psl, f);
            }
        }
    }
}
Ejemplo n.º 3
0
struct bbiChromInfo *getAllChroms(struct bbiFile *fileList)
/* Read chromosomes from all files and make sure they agree, and return merged list. */
{
struct bbiFile *file;
struct hash *hash = hashNew(0);
struct bbiChromInfo *nameList = NULL;
for (file = fileList; file != NULL; file = file->next)
    {
    struct bbiChromInfo *info, *next, *infoList = bbiChromList(file);
    for (info = infoList; info != NULL; info = next)
        {
	next = info->next;

	struct bbiChromInfo *oldInfo = hashFindVal(hash, info->name);
	if (oldInfo != NULL)
	    {
	    if (info->size != oldInfo->size)
		errAbort("ERROR: Merging from different assemblies? "
		         "Chromosome %s is %d in %s but %d in another file",
			 info->name, (int)(info->size), file->fileName, (int)oldInfo->size);
	    }
	else
	    {
	    hashAdd(hash, info->name, info);
	    slAddHead(&nameList, info);
	    }
	}
    }
slSort(&nameList, bbiChromInfoCmpStringsWithEmbeddedNumbers);
return nameList;
}
Ejemplo n.º 4
0
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;
char *enrichedIn = elderVf->enrichedIn;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    {
    struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

    /* Get open big wig files for both younger and older. */
    char *elderPath = edwPathForFileId(conn, elderEf->id);
    char *youngerPath = edwPathForFileId(conn, youngerEf->id);
    struct bbiFile *elderBbi = bigWigFileOpen(elderPath);
    struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath);

    /* Figure out thresholds */
    double elderThreshold = twoStdsOverMean(elderBbi);
    double youngerThreshold = twoStdsOverMean(youngerBbi);

    /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
    struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew();
    struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
    struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
    struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, 
	    elderThreshold, youngerThreshold, c, cInEnriched, cClipped);
	}

    /* Make up correlation structure . */
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    cor->pearsonClipped = correlateResult(cClipped);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);


    bigWigValsOnChromFree(&bVals);
    bigWigValsOnChromFree(&aVals);
    genomeRangeTreeFree(&targetGrt);
    freez(&cor);
    correlateFree(&c);
    bigWigFileClose(&youngerBbi);
    bigWigFileClose(&elderBbi);
    freez(&youngerPath);
    freez(&elderPath);
    }
}
Ejemplo n.º 5
0
static void downloadFullGenome(BigFileReaderData * data) {
	struct bbiChromInfo *chromList = bbiChromList(data->bwf);
	struct bbiChromInfo *chrom;

	for (chrom = chromList; chrom; chrom = chrom->next) 
		if (downloadBigRegion(data, chrom->name, 0, chrom->size))
			break;

	// TODO free chromList memory... yes but labels lost! Need to be copied out first....
	//bbiChromInfoFreeList(&(data->chromList));
}
Ejemplo n.º 6
0
static struct hash* bbiChromSizes(struct bbiFile* bbi)
/* return the hash of chrom sizes from the bigBed/bigWig */
{
    struct bbiChromInfo* cList = bbiChromList(bbi);
    struct bbiChromInfo* c;
    struct hash* cHash = newHash(10);
    for (c = cList; c != NULL; c = c->next)
        hashAddInt(cHash, c->name, (int)c->size);
    bbiChromInfoFreeList(&cList);
    return cHash;
}
Ejemplo n.º 7
0
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = asParseText(annoRowBigWigAsText);
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWig;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Ejemplo n.º 8
0
void bigWigCorrelate(char *aFileName, char *bFileName)
/* bigWigCorrelate - Correlate bigWig files, optionally only on target regions.. */
{
struct genomeRangeTree *targetGrt = NULL;
if (restrictFile)
    targetGrt = grtFromBigBed(restrictFile);
struct bbiFile *aBbi = bigWigFileOpen(aFileName);
struct bbiFile *bBbi = bigWigFileOpen(bFileName);
struct correlate *c = correlateNew();
struct bbiChromInfo *chrom, *chromList = bbiChromList(aBbi);
struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    addBwCorrelations(chrom, targetGrt, aVals, bVals, aBbi, bBbi, threshold, threshold, c);
    }
printf("%g\n", correlateResult(c));
}
Ejemplo n.º 9
0
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = annoStreamBigWigAsObject();
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
//#*** Would be more memory-efficient to do arWigSingle for bedGraphs.
//#*** annoGrateWig would need to be updated to handle incoming arWigSingle.
streamer->rowType = arWigVec;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Ejemplo n.º 10
0
void bigBedToBed(char *inFile, char *outFile)
/* bigBedToBed - Convert from bigBed to ascii bed format.. */
{
struct bbiFile *bbi = bigBedFileOpen(inFile);
FILE *f = mustOpen(outFile, "w");
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
int itemCount = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    if (clChrom != NULL && !sameString(clChrom, chrom->name))
        continue;
    char *chromName = chrom->name;
    int start = 0, end = chrom->size;
    if (clStart > 0)
        start = clStart;
    if (clEnd > 0)
        end = clEnd;
    int itemsLeft = 0;	// Zero actually means no limit.... 
    if (maxItems != 0)
        {
	itemsLeft = maxItems - itemCount;
	if (itemsLeft <= 0)
	    break;
	}
    struct lm *lm = lmInit(0);
    struct bigBedInterval *interval, *intervalList = bigBedIntervalQuery(bbi, chromName, 
    	start, end, itemsLeft, lm);
    for (interval = intervalList; interval != NULL; interval = interval->next)
	{
	fprintf(f, "%s\t%u\t%u", chromName, interval->start, interval->end);
	char *rest = interval->rest;
	if (rest != NULL)
	    fprintf(f, "\t%s\n", rest);
	else
	    fprintf(f, "\n");
	}
    lmCleanup(&lm);
    }
bbiChromInfoFreeList(&chromList);
carefulClose(&f);
bbiFileClose(&bbi);
}
Ejemplo n.º 11
0
struct genomeRangeTree *edwGrtFromBigBed(char *fileName)
/* Return genome range tree for simple (unblocked) bed */
{
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct genomeRangeTree *grt = genomeRangeTreeNew();
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name);
    struct lm *lm = lmInit(0);
    struct bigBedInterval *iv, *ivList = NULL;
    ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
    for (iv = ivList; iv != NULL; iv = iv->next)
        rangeTreeAdd(tree, iv->start, iv->end);
    lmCleanup(&lm);
    }
bigBedFileClose(&bbi);
bbiChromInfoFreeList(&chromList);
return grt;
}
Ejemplo n.º 12
0
void bigWigInfo(char *fileName)
/* bigWigInfo - Print out information about bigWig file.. */
{
struct bbiFile *bwf = bigWigFileOpen(fileName);

if (optionExists("minMax"))
    {
    struct bbiSummaryElement sum = bbiTotalSummary(bwf);
    printf("%f %f\n", sum.minVal, sum.maxVal);
    return;
    }

printf("version: %d\n", bwf->version);
printf("isCompressed: %s\n", (bwf->uncompressBufSize > 0 ? "yes" : "no"));
printf("isSwapped: %d\n", bwf->isSwapped);
printLabelAndLongNumber("primaryDataSize", bwf->unzoomedIndexOffset - bwf->unzoomedDataOffset);
if (bwf->levelList != NULL)
    {
    long long indexEnd = bwf->levelList->dataOffset;
    printLabelAndLongNumber("primaryIndexSize", indexEnd - bwf->unzoomedIndexOffset);
    }
printf("zoomLevels: %d\n", bwf->zoomLevels);
if (optionExists("zooms"))
    {
    struct bbiZoomLevel *zoom;
    for (zoom = bwf->levelList; zoom != NULL; zoom = zoom->next)
	printf("\t%d\t%d\n", zoom->reductionLevel, (int)(zoom->indexOffset - zoom->dataOffset));
    }
struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf);
printf("chromCount: %d\n", slCount(chromList));
if (optionExists("chroms"))
    for (chrom=chromList; chrom != NULL; chrom = chrom->next)
	printf("\t%s %d %d\n", chrom->name, chrom->id, chrom->size);
struct bbiSummaryElement sum = bbiTotalSummary(bwf);
printLabelAndLongNumber("basesCovered", sum.validCount);
printf("mean: %f\n", sum.sumData/sum.validCount);
printf("min: %f\n", sum.minVal);
printf("max: %f\n", sum.maxVal);
printf("std: %f\n", calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount));
}
Ejemplo n.º 13
0
/* --- .Call ENTRY POINT --- */
SEXP BWGFile_seqlengths(SEXP r_filename) {
  pushRHandlers();
  struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename)));
  struct bbiChromInfo *chromList = bbiChromList(file);
  struct bbiChromInfo *chrom = chromList;
  SEXP seqlengths, seqlengthNames;
  
  PROTECT(seqlengths = allocVector(INTSXP, slCount(chromList)));
  seqlengthNames = allocVector(STRSXP, length(seqlengths));
  setAttrib(seqlengths, R_NamesSymbol, seqlengthNames);
  
  for(int i = 0; i < length(seqlengths); i++) {
    INTEGER(seqlengths)[i] = chrom->size;
    SET_STRING_ELT(seqlengthNames, i, mkChar(chrom->name));
    chrom = chrom->next;
  }
  
  bbiChromInfoFreeList(&chromList);
  popRHandlers();
  UNPROTECT(1);
  return seqlengths;
}
void bigWigToWig(char *inFile, char *outFile)
/* bigWigToWig - Convert bigWig to wig.  This will keep more of the same structure of the 
 * original wig than bigWigToBedGraph does, but still will break up large stepped sections into 
 * smaller ones. */
{
struct bbiFile *bwf = bigWigFileOpen(inFile);
FILE *f = mustOpen(outFile, "w");
struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    if (clChrom != NULL && !sameString(clChrom, chrom->name))
        continue;
    char *chromName = chrom->name;
    int start = 0, end = chrom->size;
    if (clStart > 0)
        start = clStart;
    if (clEnd > 0)
        end = clEnd;
    bigWigIntervalDump(bwf, chromName, start, end, 0, f);
    }
bbiChromInfoFreeList(&chromList);
carefulClose(&f);
bbiFileClose(&bwf);
}
Ejemplo n.º 15
0
void showSchemaBigBed(char *table, struct trackDb *tdb)
/* Show schema on bigBed. */
{
/* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
struct sqlConnection *conn = NULL;
if (!trackHubDatabase(database))
    conn = hAllocConn(database);
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chromList = bbiChromList(bbi);
struct lm *lm = lmInit(0);
struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10);

/* Get description of columns, making it up from BED records if need be. */
struct asObject *as = bigBedAsOrDefault(bbi);

hPrintf("<B>Database:</B> %s", database);
hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>Primary Table:</B> %s<br>", table);
hPrintf("<B>Big Bed File:</B> %s", fileName);
if (bbi->version >= 2)
    {
    hPrintf("<BR><B>Item Count:</B> ");
    printLongWithCommas(stdout, bigBedItemCount(bbi));
    }
hPrintf("<BR>\n");
hPrintf("<B>Format description:</B> %s<BR>", as->comment);

/* Put up table that describes fields. */
hTableStart();
hPrintf("<TR><TH>field</TH>");
if (ivList != NULL)
    hPrintf("<TH>example</TH>");
hPrintf("<TH>description</TH> ");
puts("</TR>\n");
struct asColumn *col;
int colCount = 0;
char *row[bbi->fieldCount];
char startBuf[16], endBuf[16];
if (ivList != NULL)
    {
    char *dupeRest = lmCloneString(lm, ivList->rest);	/* Manage rest-stomping side-effect */
    bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
    ivList->rest = dupeRest;
    }
for (col = as->columnList; col != NULL; col = col->next)
    {
    hPrintf("<TR><TD><TT>%s</TT></TD>", col->name);
    if (ivList != NULL)
	hPrintf("<TD>%s</TD>", row[colCount]);
    hPrintf("<TD>%s</TD></TR>", col->comment);
    ++colCount;
    }

/* If more fields than descriptions put up minimally helpful info (at least has example). */
for ( ; colCount < bbi->fieldCount; ++colCount)
    {
    hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1);
    if (ivList != NULL)
	hPrintf("<TD>%s</TD>", row[colCount]);
    hPrintf("<TD>n/a</TD></TR>\n");
    }
hTableEnd();


if (ivList != NULL)
    {
    /* Put up another section with sample rows. */
    webNewSection("Sample Rows");
    hTableStart();

    /* Print field names as column headers for example */
    hPrintf("<TR>");
    int colIx = 0;
    for (col = as->columnList; col != NULL; col = col->next)
	{
	hPrintf("<TH>%s</TH>", col->name);
	++colIx;
	}
    for (; colIx < colCount; ++colIx)
	hPrintf("<TH>column%d</TH>", colIx+1);
    hPrintf("</TR>\n");

    /* Print sample lines. */
    struct bigBedInterval *iv;
    for (iv=ivList; iv != NULL; iv = iv->next)
	{
	bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
	hPrintf("<TR>");
	for (colIx=0; colIx<colCount; ++colIx)
	    {
	    writeHtmlCell(row[colIx]);
	    }
	hPrintf("</TR>\n");
	}
    hTableEnd();
    }
printTrackHtml(tdb);
/* Clean up and go home. */
lmCleanup(&lm);
bbiFileClose(&bbi);
freeMem(fileName);
hFreeConn(&conn);
}
Ejemplo n.º 16
0
void doEnrichmentsFromBigBed(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigBedPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigBedFileOpen(bigBedPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigBed data in memory. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigBed for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    struct bigBedInterval *ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
    struct bigBedInterval *iv;
    struct rbTree *bbTree = rangeTreeNew();
    for (iv = ivList; iv != NULL; iv = iv->next)
	 rangeTreeAdd(bbTree, iv->start, iv->end);
    struct range *bbRange, *bbRangeList = rangeTreeList(bbTree);

    /* Loop through all targets adding overlaps from ivList and unique overlaps from bbRangeList */
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	if (target->skip)
	    continue;
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    struct bigBedInterval *iv;
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->overlapBases += overlap;
		}
	    for (bbRange = bbRangeList; bbRange != NULL; bbRange = bbRange->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, bbRange->start, bbRange->end);
		target->uniqOverlapBases += overlap;
		}
	    }
	}
    rangeTreeFree(&bbTree);
    lmCleanup(&lm);
    }

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigBedFileClose(&bbi);
freez(&bigBedPath);
}
Ejemplo n.º 17
0
/* This old way is ~3 times as slow */
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    long startBigQueryTime = clock1000();
    struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    struct bbiInterval *iv;

    /* Loop through all targets adding overlaps from ivList */
    long startOverlapTime = clock1000();
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->uniqOverlapBases += overlap;
		target->overlapBases += overlap * iv->val;
		}
	    }
	}
    long endOverlapTime = clock1000();
    totalOverlapTime += endOverlapTime - startOverlapTime;
    lmCleanup(&lm);
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Ejemplo n.º 18
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Ejemplo n.º 19
0
void doBigBedReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigBed file. */
{
/* If got both pairs, work is done already */
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap") 
    && pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;

int numColIx = 0;
if (sameString(format, "narrowPeak") || sameString(format, "broadPeak"))
    numColIx = 6;	// signalVal
else
    numColIx = 4;	// score
numColIx -= 3;		// Subtract off chrom/start/end
char *enrichedIn = elderVf->enrichedIn;
struct genomeRangeTree *targetGrt = NULL;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

/* Get open big bed files for both younger and older. */
char *elderPath = edwPathForFileId(conn, elderEf->id);
char *youngerPath = edwPathForFileId(conn, youngerEf->id);
struct bbiFile *elderBbi = bigBedFileOpen(elderPath);
struct bbiFile *youngerBbi = bigBedFileOpen(youngerPath);

/* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
struct correlate *c = correlateNew(), *cInEnriched = correlateNew();
struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
long long elderTotalSpan = 0, youngerTotalSpan = 0, overlapTotalSpan = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    addBbCorrelations(chrom, targetGrt, elderBbi, youngerBbi, numColIx, c, cInEnriched,
	&elderTotalSpan, &youngerTotalSpan, &overlapTotalSpan);
    }

/* Make up correlation structure and save. */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    {
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);
    freez(&cor);
    }

/* Also make up sample structure and save.  */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap"))
    {
    struct edwQaPairSampleOverlap *sam;
    AllocVar(sam);
    sam->elderFileId = elderVf->fileId;
    sam->youngerFileId = youngerVf->fileId;
    sam->elderSampleBases = elderTotalSpan;
    sam->youngerSampleBases = youngerTotalSpan;
    sam->sampleOverlapBases = overlapTotalSpan;
    setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);
    edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128);
    freez(&sam);
    }

genomeRangeTreeFree(&targetGrt);
correlateFree(&c);
bigBedFileClose(&youngerBbi);
bigBedFileClose(&elderBbi);
freez(&youngerPath);
freez(&elderPath);
}