void addBigWigIntervalInfo(struct bbiFile *bbi, struct lm *lm, char *chrom, int start, int end, int *pSumSize, int *pSumCoverage, double *pSumVal) /* Read in interval from bigBed and add it sums. */ { struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bbi, chrom, start, end, lm); *pSumSize += (end - start); for (iv = ivList; iv != NULL; iv = iv->next) { int cov1 = rangeIntersection(iv->start, iv->end, start, end); if (cov1 > 0) { *pSumCoverage += cov1; *pSumVal += cov1 * iv->val; } } }
boolean bigWigValsOnChromFetchData(struct bigWigValsOnChrom *chromVals, char *chrom, struct bbiFile *bigWig) /* Fetch data for chromosome from bigWig. Returns FALSE if not data on that chrom. */ { /* Fetch chromosome and size into self. */ freeMem(chromVals->chrom); chromVals->chrom = cloneString(chrom); long chromSize = chromVals->chromSize = bbiChromSize(bigWig, chrom); if (chromSize <= 0) return FALSE; /* Make sure buffers are big enough. */ if (chromSize > chromVals->bufSize) { freeMem(chromVals->valBuf); freeMem(chromVals->covBuf); chromVals->valBuf = needHugeMem((sizeof(double))*chromSize); chromVals->covBuf = bitAlloc(chromSize); chromVals->bufSize = chromSize; } /* Zero out buffers */ bitClear(chromVals->covBuf, chromSize); double *valBuf = chromVals->valBuf; int i; for (i=0; i<chromSize; ++i) valBuf[i] = 0.0; fetchIntoBuf(bigWig, chrom, 0, chromSize, chromVals); #ifdef OLD /* Fetch intervals for this chromosome and fold into buffers. */ struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bigWig, chrom, 0, chromSize, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; int end = iv->end; for (i=iv->start; i<end; ++i) valBuf[i] = val; bitSetRange(chromVals->covBuf, iv->start, iv->end - iv->start); } lmCleanup(&lm); #endif /* OLD */ return TRUE; }
static void bigWigClick(struct trackDb *tdb, char *fileName) /* Display details for BigWig data tracks. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = NULL; struct lm *lm = lmInit(0); struct bbiInterval *bbList = NULL; char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery"); unsigned maxWTQ = 0; if (isNotEmpty(maxWinToQuery)) maxWTQ = sqlUnsigned(maxWinToQuery); if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart)) { bbi = bigWigFileOpen(fileName); bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm); } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, BASE_1(winStart)); sprintLongWithCommas(num2Buf, winEnd); printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf ); sprintLongWithCommas(num1Buf, winEnd-winStart); printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf); if (bbList != NULL) { bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd); } else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart)) { sprintLongWithCommas(num1Buf, maxWTQ); printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf); } else { printf("<P>No data overlapping current position.</P>"); } lmCleanup(&lm); bbiFileClose(&bbi); }
struct bed *bigWigIntervalsToBed(struct sqlConnection *conn, char *table, struct region *region, struct lm *lm) /* Return a list of unfiltered, unintersected intervals in region as bed (for * secondary table in intersection). */ { struct bed *bed, *bedList = NULL; char *fileName = bigWigFileName(table, conn); struct bbiFile *bwf = bigWigFileOpen(fileName); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, region->chrom, region->start, region->end, lm); for (iv = ivList; iv != NULL; iv = iv->next) { lmAllocVar(lm, bed); bed->chrom = region->chrom; bed->chromStart = iv->start; bed->chromEnd = iv->end; slAddHead(&bedList, bed); } slReverse(&bedList); return bedList; }
/* --- .Call ENTRY POINT --- */ SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_return_score, SEXP r_return_list) { pushRHandlers(); struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); SEXP chromNames = getAttrib(r_ranges, R_NamesSymbol); int nchroms = length(r_ranges); Rboolean return_list = asLogical(r_return_list); SEXP rangesList, rangesListEls, dataFrameList, dataFrameListEls, ans; SEXP numericListEls; bool returnScore = asLogical(r_return_score); const char *var_names[] = { "score", "" }; struct lm *lm = lmInit(0); struct bbiInterval *hits = NULL; struct bbiInterval *qhits = NULL; if (return_list) { int n_ranges = 0; for(int i = 0; i < nchroms; i++) { SEXP localRanges = VECTOR_ELT(r_ranges, i); n_ranges += get_IRanges_length(localRanges); } PROTECT(numericListEls = allocVector(VECSXP, n_ranges)); } else { PROTECT(rangesListEls = allocVector(VECSXP, nchroms)); setAttrib(rangesListEls, R_NamesSymbol, chromNames); PROTECT(dataFrameListEls = allocVector(VECSXP, nchroms)); setAttrib(dataFrameListEls, R_NamesSymbol, chromNames); } int elt_len = 0; for (int i = 0; i < nchroms; i++) { SEXP localRanges = VECTOR_ELT(r_ranges, i); int nranges = get_IRanges_length(localRanges); int *start = INTEGER(get_IRanges_start(localRanges)); int *width = INTEGER(get_IRanges_width(localRanges)); for (int j = 0; j < nranges; j++) { struct bbiInterval *queryHits = bigWigIntervalQuery(file, (char *)CHAR(STRING_ELT(chromNames, i)), start[j] - 1, start[j] - 1 + width[j], lm); /* IntegerList */ if (return_list) { qhits = queryHits; int nqhits = slCount(queryHits); SEXP ans_numeric; PROTECT(ans_numeric = allocVector(REALSXP, width[j])); memset(REAL(ans_numeric), 0, sizeof(double) * width[j]); for (int k = 0; k < nqhits; k++, qhits = qhits->next) { for (int l = qhits->start; l < qhits->end; l++) REAL(ans_numeric)[(l - start[j] + 1)] = qhits->val; } SET_VECTOR_ELT(numericListEls, elt_len, ans_numeric); elt_len++; UNPROTECT(1); } slReverse(&queryHits); hits = slCat(queryHits, hits); } /* RangedData */ if (!return_list) { int nhits = slCount(hits); slReverse(&hits); SEXP ans_start, ans_width, ans_score, ans_score_l; PROTECT(ans_start = allocVector(INTSXP, nhits)); PROTECT(ans_width = allocVector(INTSXP, nhits)); if (returnScore) { PROTECT(ans_score_l = mkNamed(VECSXP, var_names)); ans_score = allocVector(REALSXP, nhits); SET_VECTOR_ELT(ans_score_l, 0, ans_score); } else { PROTECT(ans_score_l = mkNamed(VECSXP, var_names + 1)); } for (int j = 0; j < nhits; j++, hits = hits->next) { INTEGER(ans_start)[j] = hits->start + 1; INTEGER(ans_width)[j] = hits->end - hits->start; if (returnScore) REAL(ans_score)[j] = hits->val; } SET_VECTOR_ELT(rangesListEls, i, new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); SET_VECTOR_ELT(dataFrameListEls, i, new_DataFrame("DataFrame", ans_score_l, R_NilValue, ScalarInteger(nhits))); UNPROTECT(3); } } bbiFileClose(&file); if (return_list) { ans = new_SimpleList("SimpleList", numericListEls); UNPROTECT(1); } else { PROTECT(dataFrameList = new_SimpleList("SimpleSplitDataFrameList", dataFrameListEls)); PROTECT(rangesList = new_SimpleList("SimpleRangesList", rangesListEls)); ans = new_RangedData("RangedData", rangesList, dataFrameList); UNPROTECT(4); } lmCleanup(&lm); popRHandlers(); return ans; }
/* This old way is ~3 times as slow */ void doEnrichmentsFromBigWig(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bigBed file. */ { /* Get path to bigBed, open it, and read all chromosomes. */ char *bigWigPath = cdwPathForFileId(conn, ef->id); struct bbiFile *bbi = bigWigFileOpen(bigWigPath); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); /* This takes a while, so let's figure out what parts take the time. */ long totalBigQueryTime = 0; long totalOverlapTime = 0; /* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases * for all targets. This is complicated by just wanting to keep one chromosome worth of * bigWig data in memory. Also just for performance we do a lookup of target range tree to * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */ struct lm *lm = lmInit(0); long startBigQueryTime = clock1000(); struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm); long endBigQueryTime = clock1000(); totalBigQueryTime += endBigQueryTime - startBigQueryTime; struct bbiInterval *iv; /* Loop through all targets adding overlaps from ivList */ long startOverlapTime = clock1000(); struct target *target; for (target = targetList; target != NULL; target = target->next) { struct genomeRangeTree *grt = target->grt; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { for (iv = ivList; iv != NULL; iv = iv->next) { int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end); target->uniqOverlapBases += overlap; target->overlapBases += overlap * iv->val; } } } long endOverlapTime = clock1000(); totalOverlapTime += endOverlapTime - startOverlapTime; lmCleanup(&lm); } verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime); /* Now loop through targets and save enrichment info to database */ struct target *target; for (target = targetList; target != NULL; target = target->next) { struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, target->overlapBases, target->uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } bbiChromInfoFreeList(&chromList); bigWigFileClose(&bbi); freez(&bigWigPath); }
struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn, struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl, double filterUl, struct lm *lm) /* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file * and if necessary apply filter and intersection. Return list which is allocated in lm. */ { char *chrom = region->chrom; int chromSize = hChromSize(database, chrom); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm); /* Run filter if necessary */ if (filterCmp != wigNoOp_e) { struct bbiInterval *next, *newList = NULL; for (iv = ivList; iv != NULL; iv = next) { next = iv->next; if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl)) { slAddHead(&newList, iv); } } slReverse(&newList); ivList = newList; } /* Run intersection if necessary */ if (anyIntersection()) { boolean isBpWise = intersectionIsBpWise(); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); struct bbiInterval *next, *newList = NULL; double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01; double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01; char *op = cartString(cart, hgtaIntersectOp); for (iv = ivList; iv != NULL; iv = next) { next = iv->next; int start = iv->start; int size = iv->end - start; int overlap = bitCountRange(bits2, start, size); if (isBpWise) { if (overlap == size) { slAddHead(&newList, iv); } else if (overlap > 0) { /* Here we have to break things up. */ double val = iv->val; struct bbiInterval *partIv = iv; // Reuse memory for first interval int s = iv->start, end = iv->end; for (;;) { s = bitFindSet(bits2, s, end); if (s >= end) break; int bitsSet = bitFindClear(bits2, s, end) - s; if (partIv == NULL) lmAllocVar(lm, partIv); partIv->start = s; partIv->end = s + bitsSet; partIv->val = val; slAddHead(&newList, partIv); partIv = NULL; s += bitsSet; if (s >= end) break; } } } else { double coverage = (double)overlap/size; if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage)) { slAddHead(&newList, iv); } } } slReverse(&newList); ivList = newList; bitFree(&bits2); } return ivList; }
void bigWigMerge(int inCount, char *inFiles[], char *outFile) /* bigWigMerge - Merge together multiple bigWigs into a single one.. */ { /* Make a list of open bigWig files. */ struct bbiFile *inFile, *inFileList = NULL; int i; for (i=0; i<inCount; ++i) { if (clInList) { addWigsInFile(inFiles[i], &inFileList); } else { inFile = bigWigFileOpen(inFiles[i]); slAddTail(&inFileList, inFile); } } FILE *f = mustOpen(outFile, "w"); struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList); verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", slCount(chromList), slCount(inFileList)); double *mergeBuf = NULL; int mergeBufSize = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct lm *lm = lmInit(0); /* Make sure merge buffer is big enough. */ int chromSize = chrom->size; verboseDot(); verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize); if (chromSize > mergeBufSize) { mergeBufSize = chromSize; freeMem(mergeBuf); mergeBuf = needHugeMem(mergeBufSize * sizeof(double)); } int i; for (i=0; i<chromSize; ++i) mergeBuf[i] = 0.0; /* Loop through each input file grabbing data and merging it in. */ for (inFile = inFileList; inFile != NULL; inFile = inFile->next) { struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm); verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName); struct bbiInterval *iv; for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; if (val > clClip) val = clClip; int end = iv->end; for (i=iv->start; i < end; ++i) mergeBuf[i] += val; } } /* Output each range of same values as a bedGraph item */ int sameCount; for (i=0; i<chromSize; i += sameCount) { sameCount = doublesTheSame(mergeBuf+i, chromSize-i); double val = mergeBuf[i] + clAdjust; if (val > clThreshold) fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val); } lmCleanup(&lm); } verbose(1, "\n"); carefulClose(&f); }