static void bigWigLoadItems(struct track *tg) /* Fill up tg->items with bedGraphItems derived from a bigWig file */ { char *extTableString = trackDbSetting(tg->tdb, "extTable"); if (extTableString != NULL) { // if there's an extra table, read this one in too struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); char *fileName = bbiNameFromTable(conn, tg->table); struct bbiFile *bbiFile = bigWigFileOpen(fileName); slAddHead(&tg->bbiFile, bbiFile); fileName = bbiNameFromTable(conn, extTableString); bbiFile = bigWigFileOpen(fileName); slAddHead(&tg->bbiFile, bbiFile); hFreeConn(&conn); } else { if (tg->bbiFile == NULL) { /* Figure out bigWig file name. */ struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); char *fileName = bbiNameFromTable(conn, tg->table); tg->bbiFile = bigWigFileOpen(fileName); hFreeConn(&conn); } } }
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly, struct edwFile *elderEf, struct edwValidFile *elderVf, struct edwFile *youngerEf, struct edwValidFile *youngerVf) /* Do correlation analysis between elder and younger and save result to * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */ { if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation")) return; char *enrichedIn = elderVf->enrichedIn; if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown")) { struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn); /* Get open big wig files for both younger and older. */ char *elderPath = edwPathForFileId(conn, elderEf->id); char *youngerPath = edwPathForFileId(conn, youngerEf->id); struct bbiFile *elderBbi = bigWigFileOpen(elderPath); struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath); /* Figure out thresholds */ double elderThreshold = twoStdsOverMean(elderBbi); double youngerThreshold = twoStdsOverMean(youngerBbi); /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/ struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, elderThreshold, youngerThreshold, c, cInEnriched, cClipped); } /* Make up correlation structure . */ struct edwQaPairCorrelation *cor; AllocVar(cor); cor->elderFileId = elderVf->fileId; cor->youngerFileId = youngerVf->fileId; cor->pearsonOverall = correlateResult(c); cor->pearsonInEnriched = correlateResult(cInEnriched); cor->pearsonClipped = correlateResult(cClipped); edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128); bigWigValsOnChromFree(&bVals); bigWigValsOnChromFree(&aVals); genomeRangeTreeFree(&targetGrt); freez(&cor); correlateFree(&c); bigWigFileClose(&youngerBbi); bigWigFileClose(&elderBbi); freez(&youngerPath); freez(&elderPath); } }
void bigWigFillDataVector(char *table, struct region *region, struct sqlConnection *conn, struct dataVector *vector) /* Fill in data vector with bigWig info on region. Handles filters and intersections. */ { /* Figure out filter values if any. */ double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); /* Get intervals that pass filter and intersection. */ struct lm *lm = lmInit(0); char *fileName = bigWigFileName(table, conn); struct bbiFile *bwf = bigWigFileOpen(fileName); struct bbiInterval *iv, *ivList; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); int vIndex = 0; for (iv = ivList; iv != NULL; iv = iv->next) { int start = max(iv->start, region->start); int end = min(iv->end, region->end); double val = iv->val; int i; for (i=start; i<end && vIndex < vector->maxCount; ++i) { vector->value[vIndex] = val; vector->position[vIndex] = i; ++vIndex; } } vector->count = vIndex; bbiFileClose(&bwf); freeMem(fileName); lmCleanup(&lm); }
void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab) /* bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns. */ { struct bed *bedList; int fieldCount; bedLoadAllReturnFieldCount(inBed, &bedList, &fieldCount); checkUniqueNames(bedList); struct bbiFile *bbi = bigWigFileOpen(inBw); FILE *f = mustOpen(outTab, "w"); FILE *bedF = NULL; if (bedOut != NULL) bedF = mustOpen(bedOut, "w"); /* Count up number of blocks in file. It takes about 1/100th of of second to * look up a single block in a bigWig. On the other hand to stream through * the whole file setting a array of doubles takes about 30 seconds, so we change * strategy at 3,000 blocks. * I (Jim) usually avoid having two paths through the code like this, and am tempted * to always go the ~30 second chromosome-at-a-time way. On the other hand the block-way * was developed first, and it was useful to have both ways to test against each other. * (This found a bug where the chromosome way wasn't handling beds in chromosomes not * covered by the bigWig for instance). Since this code is not likely to change too * much, keeping both implementations in seems reasonable. */ int blockCount = countBlocks(bedList, fieldCount); verbose(2, "Got %d blocks, if >= 3000 will use chromosome-at-a-time method\n", blockCount); if (blockCount < 3000) averageFetchingEachBlock(bbi, bedList, fieldCount, f, bedF); else averageFetchingEachChrom(bbi, &bedList, fieldCount, f, bedF); carefulClose(&bedF); carefulClose(&f); }
/* --- .Call ENTRY POINT --- */ SEXP BWGFile_summary(SEXP r_filename, SEXP r_chrom, SEXP r_ranges, SEXP r_size, SEXP r_type, SEXP r_default_value) { pushRHandlers(); struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); enum bbiSummaryType type = bbiSummaryTypeFromString((char *)CHAR(asChar(r_type))); double default_value = asReal(r_default_value); int *start = INTEGER(get_IRanges_start(r_ranges)); int *width = INTEGER(get_IRanges_width(r_ranges)); SEXP ans; PROTECT(ans = allocVector(VECSXP, length(r_chrom))); for (int i = 0; i < length(r_chrom); i++) { int size = INTEGER(r_size)[i]; char *chrom = (char *)CHAR(STRING_ELT(r_chrom, i)); SEXP r_values = allocVector(REALSXP, size); double *values = REAL(r_values); for (int j = 0; j < size; j++) values[j] = default_value; SET_VECTOR_ELT(ans, i, r_values); bool success = bigWigSummaryArray(file, chrom, start[i] - 1, start[i] - 1 + width[i], type, size, values); if (!success) warning("Failed to summarize range %d (%s:%d-%d)", i, chrom, start[i], start[i] - 1 + width[i]); } bbiFileClose(&file); popRHandlers(); UNPROTECT(1); return ans; }
void checkInputOpenFiles(struct inInfo *array, int count) /* Make sure all of the input is there and of right format before going forward. Since * this is going to take a while we want to fail fast. */ { int i; for (i=0; i<count; ++i) { struct inInfo *in = &array[i]; switch (in->type) { case itBigWig: { /* Just open and close, it will abort if any problem. */ in->bbi = bigWigFileOpen(in->fileName); break; } case itPromoterBed: case itUnstrandedBed: case itBlockedBed: { struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE); char *line; lineFileNeedNext(lf, &line, NULL); char *dupe = cloneString(line); char *row[256]; int wordCount = chopLine(dupe, row); struct bed *bed = NULL; switch (in->type) { case itPromoterBed: lineFileExpectAtLeast(lf, 6, wordCount); bed = bedLoadN(row, 6); char strand = bed->strand[0]; if (strand != '+' && strand != '-') errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]); break; case itUnstrandedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 4); break; case itBlockedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 12); break; default: internalErr(); break; } bedFree(&bed); freez(&dupe); lineFileReuse(lf); break; } default: internalErr(); break; } } }
void printBiggestGap(char *database, struct sqlConnection *conn, struct slName *chromList, struct hash *chromHash, char *track) /* Look up track in database, figure out which type it is, call * appropriate biggest gap finder, and then print result. */ { struct trackDb *tdb = hTrackInfo(conn, track); struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table); char *typeWord = cloneFirstWord(tdb->type); boolean isBig = FALSE, isBigBed = FALSE; struct bbiFile *bbi = NULL; if (sameString(typeWord, "bigBed")) { isBig = TRUE; isBigBed = TRUE; bbi = bigBedFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); } else if (sameString(typeWord, "bigWig")) { isBig = TRUE; bbi = bigWigFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); } char *biggestChrom = NULL; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; struct slName *chrom; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (!allParts && strchr(chrom->name, '_')) // Generally skip weird chroms continue; if (female && sameString(chrom->name, "chrY")) continue; int chromSize = hashIntVal(chromHash, chrom->name); struct rbTree *rt = rangeTreeNew(); int start = 0, end = 0, size = 0; if (isBig) bigCoverageIntoTree(tdb, bbi, chrom->name, chromSize, rt, isBigBed); else tableCoverageIntoTree(hti, tdb, conn, chrom->name, chromSize, rt); if (rt->n > 0) // Want to keep completely uncovered chromosome uncovered addGaps(conn, chrom->name, rt); biggestGapFromRangeTree(rt, chromSize, &start, &end, &size); if (size > biggestSize) { biggestSize = size; biggestStart = start; biggestEnd = end; biggestChrom = chrom->name; } rangeTreeFree(&rt); } printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd); if (noComma) printf("%d", biggestSize); else printLongWithCommas(stdout, biggestSize); putchar('\n'); freez(&typeWord); bbiFileClose(&bbi); }
void bigWigCorrelate(char *aFileName, char *bFileName) /* bigWigCorrelate - Correlate bigWig files, optionally only on target regions.. */ { struct genomeRangeTree *targetGrt = NULL; if (restrictFile) targetGrt = grtFromBigBed(restrictFile); struct bbiFile *aBbi = bigWigFileOpen(aFileName); struct bbiFile *bBbi = bigWigFileOpen(bFileName); struct correlate *c = correlateNew(); struct bbiChromInfo *chrom, *chromList = bbiChromList(aBbi); struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew(); struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { addBwCorrelations(chrom, targetGrt, aVals, bVals, aBbi, bBbi, threshold, threshold, c); } printf("%g\n", correlateResult(c)); }
boolean bigWigSummaryArrayExtended(char *fileName, char *chrom, bits32 start, bits32 end, int summarySize, struct bbiSummaryElement *summary) /* Get extended summary information for summarySize evenely spaced elements into * the summary array. */ { struct bbiFile *bbi = bigWigFileOpen(fileName); boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, bigWigIntervalQuery, summarySize, summary); bbiFileClose(&bbi); return ret; }
boolean bigWigSummaryArray(char *fileName, char *chrom, bits32 start, bits32 end, enum bbiSummaryType summaryType, int summarySize, double *summaryValues) /* Fill in summaryValues with data from indicated chromosome range in bigWig file. * Be sure to initialize summaryValues to a default value, which will not be touched * for regions without data in file. (Generally you want the default value to either * be 0.0 or nan("") depending on the application.) Returns FALSE if no data * at that position. */ { struct bbiFile *bwf = bigWigFileOpen(fileName); boolean ret = bbiSummaryArray(bwf, chrom, start, end, bigWigIntervalQuery, summaryType, summarySize, summaryValues); bbiFileClose(&bwf); return ret; }
void addWigsInFile(char *fileName, struct bbiFile **pList) /* Treate each non-empty non-sharp line of fileName as a bigWig file name * and try to load the bigWig and add to list */ { int i,count; char **words, *buf = NULL; readAllWords(fileName, &words ,&count, &buf); for (i=0; i<count; ++i) { struct bbiFile *inFile = bigWigFileOpen(words[i]); slAddTail(pList, inFile); } freeMem(words); freeMem(buf); }
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa) /* Create an annoStreamer (subclass) object from a file or URL. */ { struct bbiFile *bbi = bigWigFileOpen(fileOrUrl); struct asObject *asObj = asParseText(annoRowBigWigAsText); struct annoStreamBigWig *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); streamer->rowType = arWig; streamer->setRegion = asbwSetRegion; streamer->nextRow = asbwNextRow; streamer->close = asbwClose; self->chromList = bbiChromList(bbi); self->bbi = bbi; return (struct annoStreamer *)self; }
int bigWigOutRegion(char *table, struct sqlConnection *conn, struct region *region, int maxOut, enum wigOutputType wigOutType) /* Write out bigWig for region, doing intersecting and filtering as need be. */ { boolean isMerged = anySubtrackMerge(table, database); int resultCount = 0; char *wigFileName = bigWigFileName(table, conn); if (wigFileName) { struct bbiFile *bwf = bigWigFileOpen(wigFileName); if (bwf) { /* Easy case, just dump out data. */ if (!anyFilter() && !anyIntersection() && !isMerged && wigOutType == wigOutData) resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end, maxOut, stdout); /* Pretty easy case, still do it ourselves. */ else if (!isMerged && wigOutType == wigOutData) { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); struct lm *lm = lmInit(0); struct bbiInterval *ivList, *iv; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount) { fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val); } lmCleanup(&lm); } /* Harder cases - resort to making a data vector and letting that machinery handle it. */ else { struct dataVector *dv = bigWigDataVector(table, conn, region); resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL); dataVectorFree(&dv); } } bbiFileClose(&bwf); } freeMem(wigFileName); return resultCount; }
static void bigWigClick(struct trackDb *tdb, char *fileName) /* Display details for BigWig data tracks. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = NULL; struct lm *lm = lmInit(0); struct bbiInterval *bbList = NULL; char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery"); unsigned maxWTQ = 0; if (isNotEmpty(maxWinToQuery)) maxWTQ = sqlUnsigned(maxWinToQuery); if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart)) { bbi = bigWigFileOpen(fileName); bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm); } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, BASE_1(winStart)); sprintLongWithCommas(num2Buf, winEnd); printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf ); sprintLongWithCommas(num1Buf, winEnd-winStart); printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf); if (bbList != NULL) { bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd); } else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart)) { sprintLongWithCommas(num1Buf, maxWTQ); printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf); } else { printf("<P>No data overlapping current position.</P>"); } lmCleanup(&lm); bbiFileClose(&bbi); }
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa) /* Create an annoStreamer (subclass) object from a file or URL. */ { struct bbiFile *bbi = bigWigFileOpen(fileOrUrl); struct asObject *asObj = annoStreamBigWigAsObject(); struct annoStreamBigWig *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); //#*** Would be more memory-efficient to do arWigSingle for bedGraphs. //#*** annoGrateWig would need to be updated to handle incoming arWigSingle. streamer->rowType = arWigVec; streamer->setRegion = asbwSetRegion; streamer->nextRow = asbwNextRow; streamer->close = asbwClose; self->chromList = bbiChromList(bbi); self->bbi = bbi; return (struct annoStreamer *)self; }
struct bed *bigWigIntervalsToBed(struct sqlConnection *conn, char *table, struct region *region, struct lm *lm) /* Return a list of unfiltered, unintersected intervals in region as bed (for * secondary table in intersection). */ { struct bed *bed, *bedList = NULL; char *fileName = bigWigFileName(table, conn); struct bbiFile *bwf = bigWigFileOpen(fileName); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, region->chrom, region->start, region->end, lm); for (iv = ivList; iv != NULL; iv = iv->next) { lmAllocVar(lm, bed); bed->chrom = region->chrom; bed->chromStart = iv->start; bed->chromEnd = iv->end; slAddHead(&bedList, bed); } slReverse(&bedList); return bedList; }
void bigWigInfo(char *fileName) /* bigWigInfo - Print out information about bigWig file.. */ { struct bbiFile *bwf = bigWigFileOpen(fileName); if (optionExists("minMax")) { struct bbiSummaryElement sum = bbiTotalSummary(bwf); printf("%f %f\n", sum.minVal, sum.maxVal); return; } printf("version: %d\n", bwf->version); printf("isCompressed: %s\n", (bwf->uncompressBufSize > 0 ? "yes" : "no")); printf("isSwapped: %d\n", bwf->isSwapped); printLabelAndLongNumber("primaryDataSize", bwf->unzoomedIndexOffset - bwf->unzoomedDataOffset); if (bwf->levelList != NULL) { long long indexEnd = bwf->levelList->dataOffset; printLabelAndLongNumber("primaryIndexSize", indexEnd - bwf->unzoomedIndexOffset); } printf("zoomLevels: %d\n", bwf->zoomLevels); if (optionExists("zooms")) { struct bbiZoomLevel *zoom; for (zoom = bwf->levelList; zoom != NULL; zoom = zoom->next) printf("\t%d\t%d\n", zoom->reductionLevel, (int)(zoom->indexOffset - zoom->dataOffset)); } struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf); printf("chromCount: %d\n", slCount(chromList)); if (optionExists("chroms")) for (chrom=chromList; chrom != NULL; chrom = chrom->next) printf("\t%s %d %d\n", chrom->name, chrom->id, chrom->size); struct bbiSummaryElement sum = bbiTotalSummary(bwf); printLabelAndLongNumber("basesCovered", sum.validCount); printf("mean: %f\n", sum.sumData/sum.validCount); printf("min: %f\n", sum.minVal); printf("max: %f\n", sum.maxVal); printf("std: %f\n", calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount)); }
/* --- .Call ENTRY POINT --- */ SEXP BWGFile_seqlengths(SEXP r_filename) { pushRHandlers(); struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); struct bbiChromInfo *chromList = bbiChromList(file); struct bbiChromInfo *chrom = chromList; SEXP seqlengths, seqlengthNames; PROTECT(seqlengths = allocVector(INTSXP, slCount(chromList))); seqlengthNames = allocVector(STRSXP, length(seqlengths)); setAttrib(seqlengths, R_NamesSymbol, seqlengthNames); for(int i = 0; i < length(seqlengths); i++) { INTEGER(seqlengths)[i] = chrom->size; SET_STRING_ELT(seqlengthNames, i, mkChar(chrom->name)); chrom = chrom->next; } bbiChromInfoFreeList(&chromList); popRHandlers(); UNPROTECT(1); return seqlengths; }
bigWig_t * bigwig_load(const char * filename, const char * udc_dir) { bigWig_t * bigwig = NULL; struct errCatch * err; /* set cache */ if (udc_dir != NULL) udcSetDefaultDir((char*) udc_dir); /* setup error management & try to open file */ err = errCatchNew(); if (errCatchStart(err)) bigwig = bigWigFileOpen((char*)filename); errCatchEnd(err); if (err->gotError) { fprintf(stderr, "error: %s\n", err->message->string); errCatchFree(&err); return NULL; } errCatchFree(&err); return bigwig; }
char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, struct composite *comp, struct taggedFile *tfList) /* Print out info for a bigWig view, including subtracks. */ { /* Look at all tracks in this view and calculate overall limits. */ double sumOfSums = 0, sumOfSumSquares = 0; bits64 sumOfN = 0; struct taggedFile *tf; for (tf = tfList; tf != NULL; tf = tf->next) { if (sameString(tf->manifest->outputType, view->name)) { char *relativeName = tf->manifest->fileName; char *path = relativeName; struct bbiFile *bbi = bigWigFileOpen(path); struct bbiSummaryElement sum = bbiTotalSummary(bbi); sumOfSums += sum.sumData; sumOfSumSquares += sum.sumSquares; sumOfN = sum.validCount; bigWigFileClose(&bbi); } } double mean = sumOfSums/sumOfN; double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN); double clipMax = mean + 6*std; /* Output view stanza. */ char type[64]; safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax); fprintf(f, "%stype %s\n", indent, type); fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax); fprintf(f, "%sminLimit 0\n", indent); fprintf(f, "%smaxLimit %g\n", indent, clipMax); fprintf(f, "%sautoScale off\n", indent); fprintf(f, "%smaxHeightPixels 100:32:16\n", indent); fprintf(f, "%swindowingFunction mean+whiskers\n", indent); return cloneString(type); }
void bigWigToWig(char *inFile, char *outFile) /* bigWigToWig - Convert bigWig to wig. This will keep more of the same structure of the * original wig than bigWigToBedGraph does, but still will break up large stepped sections into * smaller ones. */ { struct bbiFile *bwf = bigWigFileOpen(inFile); FILE *f = mustOpen(outFile, "w"); struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (clChrom != NULL && !sameString(clChrom, chrom->name)) continue; char *chromName = chrom->name; int start = 0, end = chrom->size; if (clStart > 0) start = clStart; if (clEnd > 0) end = clEnd; bigWigIntervalDump(bwf, chromName, start, end, 0, f); } bbiChromInfoFreeList(&chromList); carefulClose(&f); bbiFileClose(&bwf); }
/* This old way is ~3 times as slow */ void doEnrichmentsFromBigWig(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bigBed file. */ { /* Get path to bigBed, open it, and read all chromosomes. */ char *bigWigPath = cdwPathForFileId(conn, ef->id); struct bbiFile *bbi = bigWigFileOpen(bigWigPath); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); /* This takes a while, so let's figure out what parts take the time. */ long totalBigQueryTime = 0; long totalOverlapTime = 0; /* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases * for all targets. This is complicated by just wanting to keep one chromosome worth of * bigWig data in memory. Also just for performance we do a lookup of target range tree to * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */ struct lm *lm = lmInit(0); long startBigQueryTime = clock1000(); struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm); long endBigQueryTime = clock1000(); totalBigQueryTime += endBigQueryTime - startBigQueryTime; struct bbiInterval *iv; /* Loop through all targets adding overlaps from ivList */ long startOverlapTime = clock1000(); struct target *target; for (target = targetList; target != NULL; target = target->next) { struct genomeRangeTree *grt = target->grt; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { for (iv = ivList; iv != NULL; iv = iv->next) { int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end); target->uniqOverlapBases += overlap; target->overlapBases += overlap * iv->val; } } } long endOverlapTime = clock1000(); totalOverlapTime += endOverlapTime - startOverlapTime; lmCleanup(&lm); } verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime); /* Now loop through targets and save enrichment info to database */ struct target *target; for (target = targetList; target != NULL; target = target->next) { struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, target->overlapBases, target->uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } bbiChromInfoFreeList(&chromList); bigWigFileClose(&bbi); freez(&bigWigPath); }
struct bbiFile *I_bigWigFileOpen(const char *fileName) { char c_fileName[256]; strcpy(c_fileName, fileName); return bigWigFileOpen(c_fileName); }
/* --- .Call ENTRY POINT --- */ SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_return_score, SEXP r_return_list) { pushRHandlers(); struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); SEXP chromNames = getAttrib(r_ranges, R_NamesSymbol); int nchroms = length(r_ranges); Rboolean return_list = asLogical(r_return_list); SEXP rangesList, rangesListEls, dataFrameList, dataFrameListEls, ans; SEXP numericListEls; bool returnScore = asLogical(r_return_score); const char *var_names[] = { "score", "" }; struct lm *lm = lmInit(0); struct bbiInterval *hits = NULL; struct bbiInterval *qhits = NULL; if (return_list) { int n_ranges = 0; for(int i = 0; i < nchroms; i++) { SEXP localRanges = VECTOR_ELT(r_ranges, i); n_ranges += get_IRanges_length(localRanges); } PROTECT(numericListEls = allocVector(VECSXP, n_ranges)); } else { PROTECT(rangesListEls = allocVector(VECSXP, nchroms)); setAttrib(rangesListEls, R_NamesSymbol, chromNames); PROTECT(dataFrameListEls = allocVector(VECSXP, nchroms)); setAttrib(dataFrameListEls, R_NamesSymbol, chromNames); } int elt_len = 0; for (int i = 0; i < nchroms; i++) { SEXP localRanges = VECTOR_ELT(r_ranges, i); int nranges = get_IRanges_length(localRanges); int *start = INTEGER(get_IRanges_start(localRanges)); int *width = INTEGER(get_IRanges_width(localRanges)); for (int j = 0; j < nranges; j++) { struct bbiInterval *queryHits = bigWigIntervalQuery(file, (char *)CHAR(STRING_ELT(chromNames, i)), start[j] - 1, start[j] - 1 + width[j], lm); /* IntegerList */ if (return_list) { qhits = queryHits; int nqhits = slCount(queryHits); SEXP ans_numeric; PROTECT(ans_numeric = allocVector(REALSXP, width[j])); memset(REAL(ans_numeric), 0, sizeof(double) * width[j]); for (int k = 0; k < nqhits; k++, qhits = qhits->next) { for (int l = qhits->start; l < qhits->end; l++) REAL(ans_numeric)[(l - start[j] + 1)] = qhits->val; } SET_VECTOR_ELT(numericListEls, elt_len, ans_numeric); elt_len++; UNPROTECT(1); } slReverse(&queryHits); hits = slCat(queryHits, hits); } /* RangedData */ if (!return_list) { int nhits = slCount(hits); slReverse(&hits); SEXP ans_start, ans_width, ans_score, ans_score_l; PROTECT(ans_start = allocVector(INTSXP, nhits)); PROTECT(ans_width = allocVector(INTSXP, nhits)); if (returnScore) { PROTECT(ans_score_l = mkNamed(VECSXP, var_names)); ans_score = allocVector(REALSXP, nhits); SET_VECTOR_ELT(ans_score_l, 0, ans_score); } else { PROTECT(ans_score_l = mkNamed(VECSXP, var_names + 1)); } for (int j = 0; j < nhits; j++, hits = hits->next) { INTEGER(ans_start)[j] = hits->start + 1; INTEGER(ans_width)[j] = hits->end - hits->start; if (returnScore) REAL(ans_score)[j] = hits->val; } SET_VECTOR_ELT(rangesListEls, i, new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); SET_VECTOR_ELT(dataFrameListEls, i, new_DataFrame("DataFrame", ans_score_l, R_NilValue, ScalarInteger(nhits))); UNPROTECT(3); } } bbiFileClose(&file); if (return_list) { ans = new_SimpleList("SimpleList", numericListEls); UNPROTECT(1); } else { PROTECT(dataFrameList = new_SimpleList("SimpleSplitDataFrameList", dataFrameListEls)); PROTECT(rangesList = new_SimpleList("SimpleRangesList", rangesListEls)); ans = new_RangedData("RangedData", rangesList, dataFrameList); UNPROTECT(4); } lmCleanup(&lm); popRHandlers(); return ans; }
void bigWigMerge(int inCount, char *inFiles[], char *outFile) /* bigWigMerge - Merge together multiple bigWigs into a single one.. */ { /* Make a list of open bigWig files. */ struct bbiFile *inFile, *inFileList = NULL; int i; for (i=0; i<inCount; ++i) { if (clInList) { addWigsInFile(inFiles[i], &inFileList); } else { inFile = bigWigFileOpen(inFiles[i]); slAddTail(&inFileList, inFile); } } FILE *f = mustOpen(outFile, "w"); struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList); verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", slCount(chromList), slCount(inFileList)); double *mergeBuf = NULL; int mergeBufSize = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct lm *lm = lmInit(0); /* Make sure merge buffer is big enough. */ int chromSize = chrom->size; verboseDot(); verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize); if (chromSize > mergeBufSize) { mergeBufSize = chromSize; freeMem(mergeBuf); mergeBuf = needHugeMem(mergeBufSize * sizeof(double)); } int i; for (i=0; i<chromSize; ++i) mergeBuf[i] = 0.0; /* Loop through each input file grabbing data and merging it in. */ for (inFile = inFileList; inFile != NULL; inFile = inFile->next) { struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm); verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName); struct bbiInterval *iv; for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; if (val > clClip) val = clClip; int end = iv->end; for (i=iv->start; i < end; ++i) mergeBuf[i] += val; } } /* Output each range of same values as a bedGraph item */ int sameCount; for (i=0; i<chromSize; i += sameCount) { sameCount = doublesTheSame(mergeBuf+i, chromSize-i); double val = mergeBuf[i] + clAdjust; if (val > clThreshold) fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val); } lmCleanup(&lm); } verbose(1, "\n"); carefulClose(&f); }
void doSummaryStatsBigWig(struct sqlConnection *conn) /* Put up page showing summary stats for bigWig track. */ { struct trackDb *track = curTrack; char *table = curTable; char *shortLabel = (track == NULL ? table : track->shortLabel); char *fileName = bigWigFileName(table, conn); long startTime = clock1000(); htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this " "page (not implemented yet). Statistics shown here are only for " "the primary table %s (%s).</EM>", shortLabel, table); struct bbiFile *bwf = bigWigFileOpen(fileName); struct region *region, *regionList = getRegions(); double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0; bits64 validCount = 0; if (!anyFilter() && !anyIntersection()) { for (region = regionList; region != NULL; region = region->next) { struct bbiSummaryElement sum; if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end, bigWigIntervalQuery, 1, &sum)) { if (validCount == 0) { minVal = sum.minVal; maxVal = sum.maxVal; } else { if (sum.minVal < minVal) minVal = sum.minVal; if (sum.maxVal > maxVal) maxVal = sum.maxVal; } sumData += sum.sumData; sumSquares += sum.sumSquares; validCount += sum.validCount; } } } else { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; double size = iv->end - iv->start; if (validCount == 0) minVal = maxVal = val; else { if (val < minVal) minVal = val; if (val > maxVal) maxVal = val; } sumData += size*val; sumSquares += size*val*val; validCount += size; } lmCleanup(&lm); } } hTableStart(); floatStatRow("mean", sumData/validCount); floatStatRow("min", minVal); floatStatRow("max", maxVal); floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount)); numberStatRow("bases with data", validCount); long long regionSize = basesInRegion(regionList,0); long long gapTotal = gapsInRegion(conn, regionList,0); numberStatRow("bases with sequence", regionSize - gapTotal); numberStatRow("bases in region", regionSize); wigFilterStatRow(conn); stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off")); long wigFetchTime = clock1000() - startTime; floatStatRow("load and calc time", 0.001*wigFetchTime); hTableEnd(); bbiFileClose(&bwf); htmlClose(); }
int main(int argc, char *argv[]) { /* 1. urlpath bigwig 2. chrom 3. start 4. stop 5. spnum 6. outfile 7. summeth */ if(argc!=8) { fputs("bwquery: wrong arg\n", stderr); return 1; } char *tail=NULL; unsigned int start=strtol(argv[3],&tail,10); if(tail[0]!='\0' || start<0) { fprintf(stderr, "bwquery: wrong start (%s)\n", argv[3]); return 1; } unsigned int stop=strtol(argv[4],&tail,10); if(tail[0]!='\0' || stop<=start) { fprintf(stderr, "bwquery: wrong stop (%s)\n", argv[4]); return 1; } unsigned int spnum=strtol(argv[5],&tail,10); if(tail[0]!='\0' || spnum<=0) { fprintf(stderr, "bwquery: wrong spnum (%s)\n", argv[5]); return 1; } unsigned int summeth=strtol(argv[7],&tail,10); if(tail[0]!='\0' || summeth<1) { fprintf(stderr, "bwquery: wrong summeth (%s)\n", argv[7]); return 1; } double *data=malloc(sizeof(double)*spnum); if(data==NULL) { fputs("bwquery: out of mem\n", stderr); return 1; } int i; for(i=0; i<spnum; i++) data[i]=0; struct bbiFile *bwf = bigWigFileOpen(argv[1]); if(bwf==NULL) { fprintf(stderr, "bwquery: no access to %s\n", argv[1]); return 1; } bbiSummaryArray(bwf, argv[2], start, stop, (BbiFetchIntervals)bigWigIntervalQuery, summeth==1?bbiSumMean: summeth==2?bbiSumMax:bbiSumMin, spnum, &data[0]); bbiFileClose(&bwf); FILE *fout=fopen(argv[6],"w"); if(fout==NULL) { fputs("bwquery: failed to open output file\n", stderr); return 1; } for(i=0; i<spnum; i++) fprintf(fout, "%f\n", data[i]); fclose(fout); return 0; }
void doEnrichmentsFromBigWig(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Figure out enrichments from a bigBed file. */ { /* Get path to bigBed, open it, and read all chromosomes. */ char *bigWigPath = cdwPathForFileId(conn, ef->id); struct bbiFile *bbi = bigWigFileOpen(bigWigPath); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew(); /* This takes a while, so let's figure out what parts take the time. */ long totalBigQueryTime = 0; long totalOverlapTime = 0; /* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases * for all targets. This is complicated by just wanting to keep one chromosome worth of * bigWig data in memory. Also just for performance we do a lookup of target range tree to * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { long startBigQueryTime = clock1000(); boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi); long endBigQueryTime = clock1000(); totalBigQueryTime += endBigQueryTime - startBigQueryTime; if (gotData) { double *valBuf = valsOnChrom->valBuf; Bits *covBuf = valsOnChrom->covBuf; /* Loop through all targets adding overlaps from ivList */ long startOverlapTime = clock1000(); struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(targetTree); for (range = rangeList; range != NULL; range = range->next) { int s = range->start, e = range->end, i; for (i=s; i<=e; ++i) { if (bitReadOne(covBuf, i)) { double x = valBuf[i]; target->uniqOverlapBases += 1; target->overlapBases += x; } } } } } long endOverlapTime = clock1000(); totalOverlapTime += endOverlapTime - startOverlapTime; } } verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime); /* Now loop through targets and save enrichment info to database */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, target->overlapBases, target->uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } bigWigValsOnChromFree(&valsOnChrom); bbiChromInfoFreeList(&chromList); bigWigFileClose(&bbi); freez(&bigWigPath); }