int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount, FILE *out) /* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you * don't care how many are printed. Returns number printed. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalDump on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block; struct udcFile *udc = bwf->udc; int printCount = 0; for (block = blockList; block != NULL; block = block->next) { udcSeek(udc, block->offset); int oneCount = bigWigBlockDumpIntersectingRange(bwf, chrom, start, end, maxCount, out); printCount += oneCount; if (maxCount != 0) { if (oneCount >= maxCount) break; maxCount -= oneCount; } } slFreeList(&blockList); return printCount; }
static bool downloadBigRegion(BigFileReaderData * data, char * chrom, int start, int finish) { struct fileOffsetSize *blockList, *block, *beforeGap, *afterGap; int blockCounter; bits64 mergedSize; data->chrom = chrom; blockList = bbiOverlappingBlocks(data->bwf, data->bwf->unzoomedCir, chrom, start, finish, NULL); for (block = blockList; block; block=afterGap) { /* Read contiguous blocks into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); // Little hack to limit the number of blocks read at any time struct fileOffsetSize * blockPtr, * prevBlock; blockCounter = 0; prevBlock = block; for (blockPtr = block; blockPtr != afterGap && blockCounter < MAX_BLOCKS; blockPtr = blockPtr->next) { blockCounter++; prevBlock = blockPtr; } if (blockCounter == MAX_BLOCKS) { beforeGap = prevBlock; afterGap = blockPtr; } mergedSize = beforeGap->offset + beforeGap->size - block->offset; if (downloadBlockRun(data, chrom, block, afterGap, mergedSize)) { slFreeList(blockList); return true; } } if (blockList) slFreeList(blockList); return false; }
int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount, FILE *out) /* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you * don't care how many are printed. Returns number printed. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalDump on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; int printCount = 0; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Do the actual dump. */ int oneCount = bigWigBlockDumpIntersectingRange(bwf->isSwapped, blockPt, blockEnd, chrom, start, end, maxCount, out); /* Keep track of how many dumped, not exceeding maximum. */ printCount += oneCount; if (maxCount != 0) { if (oneCount >= maxCount) break; maxCount -= oneCount; } blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); return printCount; }
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Get data for interval. Return list allocated out of lm. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalQuery on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct bbiInterval *el, *list = NULL; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Deal with insides of block. */ struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = s + head.itemSpan; val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeFixedStep: { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { val = memReadFloat(&blockPt, isSwapped); bits32 clippedS = s, clippedE = e; if (clippedS < start) clippedS = start; if (clippedE > end) clippedE = end; if (clippedS < clippedE) { lmAllocVar(lm, el); el->start = clippedS; el->end = clippedE; el->val = val; slAddHead(&list, el); } s += head.itemStep; e += head.itemStep; } break; } default: internalErr(); break; } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); slReverse(&list); return list; }
static void fetchIntoBuf(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct bigWigValsOnChrom *chromVals) /* Get data for interval. Return list allocated out of lm. */ { /* A lot of code duplicated with bigWigIntervalQuery, but here the clipping * is simplified since always working across full chromosome, and the output is * different. Since both of these are in inner loops and speed critical, it's hard * to factor out without perhaps making it worse than the bit of duplication. */ if (bwf->typeSig != bigWigSig) errAbort("Trying to do fetchIntoBuf on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; Bits *covBuf = chromVals->covBuf; double *valBuf = chromVals->valBuf; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Deal with insides of block. */ struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); bitSetRange(covBuf, s, e-s); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); bitSetRange(covBuf, s, head.itemSpan); bits32 e = s + head.itemSpan; bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeFixedStep: { /* Do a little optimization for the most common and worst case - step1/span1 */ if (head.itemStep == 1 && head.itemSpan == 1) { bits32 s = head.start; bits32 e = head.end; bitSetRange(covBuf, s, e-s); bits32 j; for (j=s; j<e; ++j) valBuf[j] = memReadFloat(&blockPt, isSwapped); } else { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { bitSetRange(covBuf, s, head.itemSpan); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; s += head.itemStep; e += head.itemStep; } } break; } default: internalErr(); break; } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); }
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct dyString *dy = dyStringNew(32); /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && s < end && e > start) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (dy->stringSize > 0) el->rest = lmCloneString(lm, dy->string); el->chromId = chromId; slAddHead(&list, el); } } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freeMem(uncompressBuf); dyStringFree(&dy); slFreeList(&blockList); slReverse(&list); return list; }
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Get data for interval. Return list allocated out of lm. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalQuery on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct bbiInterval *el, *list = NULL; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; // slSort(&blockList, fileOffsetSizeCmp); struct fileOffsetSize *mergedBlocks = fileOffsetSizeMerge(blockList); for (block = mergedBlocks; block != NULL; block = block->next) { udcSeek(udc, block->offset); char *blockBuf = needLargeMem(block->size); udcRead(udc, blockBuf, block->size); char *blockPt = blockBuf, *blockEnd = blockBuf + block->size; while (blockPt < blockEnd) { struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = s + head.itemSpan; val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeFixedStep: { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { val = memReadFloat(&blockPt, isSwapped); bits32 clippedS = s, clippedE = e; if (clippedS < start) clippedS = start; if (clippedE > end) clippedE = end; if (clippedS < clippedE) { lmAllocVar(lm, el); el->start = clippedS; el->end = clippedE; el->val = val; slAddHead(&list, el); } s += head.itemStep; e += head.itemStep; } break; } default: internalErr(); break; } } } slFreeList(&mergedBlocks); slFreeList(&blockList); slReverse(&list); return list; }