struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx) /* Return index associated with fieldName. Aborts if no such index. Optionally return * index in a row of this field. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct asObject *as = bigBedAsOrDefault(bbi); struct asColumn *col = asColumnFind(as, fieldName); if (col == NULL) errAbort("No field %s in %s", fieldName, bbi->fileName); int colIx = slIxFromElement(as->columnList, col); if (retFieldIx != NULL) *retFieldIx = colIx; asObjectFree(&as); /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) errAbort("%s has no indexes", bbi->fileName); udcSeek(udc, offset); /* Go through each extra index and see if it's a match */ int i; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type = udcReadBits16(udc, isSwapped); bits16 fieldCount = udcReadBits16(udc, isSwapped); bits64 fileOffset = udcReadBits64(udc, isSwapped); udcSeekCur(udc, 4); // skip over reserved bits if (type != 0) { warn("Don't understand type %d", type); internalErr(); } if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits if (fieldId == colIx) { udcSeek(udc, fileOffset); struct bptFile *bpt = bptFileAttach(bbi->fileName, udc); return bpt; } } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } errAbort("%s is not indexed in %s", fieldName, bbi->fileName); return NULL; }
static void getFloatArray(struct annoStreamWig *self, struct wiggle *wiggle, boolean *retRightFail, int *retValidCount, float *vector) /* expand wiggle bytes & spans to per-bp floats; filter values here! */ { udcSeek(self->wibFH, wiggle->offset); UBYTE wigBuf[wiggle->count]; size_t expectedBytes = sizeof(wigBuf); size_t bytesRead = udcRead(self->wibFH, wigBuf, expectedBytes); if (bytesRead != expectedBytes) errnoAbort("annoStreamWig: failed to udcRead %llu bytes from %s (got %llu)\n", (unsigned long long)expectedBytes, wiggle->file, (unsigned long long)bytesRead); paranoidCheckSize(self, wiggle); int i, j, validCount = 0; for (i = 0; i < wiggle->count; i++) { float value; if (wigBuf[i] == WIG_NO_DATA) value = NAN; else { value = BIN_TO_VALUE(wigBuf[i], wiggle->lowerLimit, wiggle->dataRange); if (annoFilterWigValueFails(self->streamer.filters, value, retRightFail)) value = NAN; else validCount++; } int bpOffset = i * wiggle->span; for (j = 0; j < wiggle->span; j++) vector[bpOffset + j] = value; } if (retValidCount != NULL) *retValidCount = validCount; }
int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount, FILE *out) /* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you * don't care how many are printed. Returns number printed. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalDump on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block; struct udcFile *udc = bwf->udc; int printCount = 0; for (block = blockList; block != NULL; block = block->next) { udcSeek(udc, block->offset); int oneCount = bigWigBlockDumpIntersectingRange(bwf, chrom, start, end, maxCount, out); printCount += oneCount; if (maxCount != 0) { if (oneCount >= maxCount) break; maxCount -= oneCount; } } slFreeList(&blockList); return printCount; }
static bits64 bptDataStart(struct bptFile *bpt) /* Return offset of first bit of data (as opposed to index) in file. In hind sight I wish * this were stored in the header, but fortunately it's not that hard to compute. */ { bits64 offset = bpt->rootOffset; for (;;) { /* Seek to block start */ udcSeek(bpt->udc, offset); /* Read block header, break if we are leaf. */ UBYTE isLeaf; UBYTE reserved; bits16 childCount; udcMustReadOne(bpt->udc, isLeaf); if (isLeaf) break; udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); /* Read and discard first key. */ char keyBuf[bpt->keySize]; udcMustRead(bpt->udc, keyBuf, bpt->keySize); /* Get file offset of sub-block. */ offset = udcReadBits64(bpt->udc, isSwapped); } return offset; }
void bptKeyAtPos(struct bptFile *bpt, bits64 itemPos, void *result) /* Fill in result with the key at given itemPos. For first piece of data itemPos is 0 * Result must be at least bpt->keySize. If result is a string it won't be zero terminated * by this routine. Use bptStringKeyAtPos instead. */ { bits64 offset = bptDataOffset(bpt, itemPos); udcSeek(bpt->udc, offset); udcMustRead(bpt->udc, result, bpt->keySize); }
void bbiAttachUnzoomedCir(struct bbiFile *bbi) /* Make sure unzoomed cir is attached. */ { if (bbi->unzoomedCir == NULL) { udcSeek(bbi->udc, bbi->unzoomedIndexOffset); bbi->unzoomedCir = cirTreeFileAttach(bbi->fileName, bbi->udc); } }
char *bigBedAutoSqlText(struct bbiFile *bbi) /* Get autoSql text if any associated with file. Do a freeMem of this when done. */ { if (bbi->asOffset == 0) return NULL; struct udcFile *f = bbi->udc; udcSeek(f, bbi->asOffset); return udcReadStringAndZero(f); }
struct slName *bigBedListExtraIndexes(struct bbiFile *bbi) /* Return list of names of extra indexes beyond primary chrom:start-end one" */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) return NULL; udcSeek(udc, offset); /* Construct list of field that are being indexed. List is list of * field numbers within asObj. */ int i; struct slInt *intList = NULL, *intEl; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type,fieldCount; type = udcReadBits16(udc, isSwapped); fieldCount = udcReadBits16(udc, isSwapped); udcSeekCur(udc, sizeof(bits64)); // skip over fileOffset udcSeekCur(udc, 4); // skip over reserved bits if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits intEl = slIntNew(fieldId); slAddHead(&intList, intEl); } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } /* Now have to make an asObject to find out name that corresponds to this field. */ struct asObject *as = bigBedAsOrDefault(bbi); /* Make list of field names out of list of field numbers */ struct slName *nameList = NULL; for (intEl = intList; intEl != NULL; intEl = intEl->next) { struct asColumn *col = slElementFromIx(as->columnList, intEl->val); if (col == NULL) { warn("Inconsistent bigBed file %s", bbi->fileName); internalErr(); } slNameAddHead(&nameList, col->name); } asObjectFree(&as); return nameList; }
struct crTreeFile *crTreeFileOpen(char *fileName) /* Open up r-tree index file - reading headers and verifying things. */ { /* Open file and allocate structure to hold info from header etc. */ struct udcFile *udc = udcFileOpen(fileName, udcDefaultDir()); struct crTreeFile *crt = needMem(sizeof(*crt)); fileName = crt->fileName = cloneString(fileName); crt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != crTreeSig) { magic = byteSwap32(magic); isSwapped = crt->isSwapped = TRUE; if (magic != crTreeSig) errAbort("%s is not a chromosome r-tree index file", fileName); } /* Read rest of high level header including notably the offsets to the * chromosome and range indexes. */ bits32 reserved32; udcMustReadOne(udc, reserved32); crt->chromOffset = udcReadBits64(udc, isSwapped); crt->cirOffset = udcReadBits64(udc, isSwapped); /* Read in the chromosome index header. */ udcSeek(udc, crt->chromOffset); crt->chromBpt = bptFileAttach(fileName, udc); /* Read in range index header. */ udcSeek(udc, crt->cirOffset); crt->cir = cirTreeFileAttach(fileName, udc); return crt; }
static boolean rFind(struct bptFile *bpt, bits64 blockStart, void *key, void *val) /* Find value corresponding to key. If found copy value to memory pointed to by val and return * true. Otherwise return false. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); UBYTE keyBuf[bpt->keySize]; /* Place to put a key, buffered on stack. */ if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); udcMustRead(bpt->udc, val, bpt->valSize); if (memcmp(key, keyBuf, bpt->keySize) == 0) return TRUE; } return FALSE; } else { /* Read and discard first key. */ udcMustRead(bpt->udc, keyBuf, bpt->keySize); /* Scan info for first file offset. */ bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); if (memcmp(key, keyBuf, bpt->keySize) < 0) break; fileOffset = udcReadBits64(bpt->udc, isSwapped); } return rFind(bpt, fileOffset, key, val); } }
static off_t kuSeek(knetFile *fp, int64_t off, int whence) /* Seek to off according to whence (but don't waste time with samtools' SEEK_END to * check empty record at end of file. Don't be fooled by the off_t return type -- * it's 0 for OK, non-0 for fail. */ { bits64 offset; if (whence == SEEK_SET) offset = off; else if (whence == SEEK_CUR) offset = off+ udcTell(fp->udcf); else return -1; verbose(2, "udcSeek(%lu, %lld)\n", (unsigned long)(fp->udcf), offset); udcSeek(fp->udcf, offset); return 0; }
static bool downloadBlockRun(BigFileReaderData * data, char * chrom, struct fileOffsetSize * firstBlock, struct fileOffsetSize * afterBlock, bits64 mergedSize) { char * mergedBuf, *blockBuf; struct fileOffsetSize * block; udcSeek(data->udc, firstBlock->offset); blockBuf = mergedBuf = (char *) needLargeMem(mergedSize); udcMustRead(data->udc, mergedBuf, mergedSize); for (block = firstBlock; block != afterBlock; block = block->next) { if (openBlock(data, block, blockBuf)) { freeMem(mergedBuf); return true; } blockBuf += block->size; } freeMem(mergedBuf); return false; }
static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) /* Recursively go across tree, calling callback at leaves. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); char keyBuf[bpt->keySize], valBuf[bpt->valSize]; if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); udcMustRead(bpt->udc, valBuf, bpt->valSize); callback(context, keyBuf, bpt->keySize, valBuf, bpt->valSize); } } else { bits64 fileOffsets[childCount]; /* Loop through to get file offsets of children. */ for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped); } /* Loop through recursing on child offsets. */ for (i=0; i<childCount; ++i) rTraverse(bpt, fileOffsets[i], context, callback); } }
static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList) /* Find values corresponding to key and add them to pList. You'll need to * Do a slRefFreeListAndVals() on the list when done. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); int keySize = bpt->keySize; UBYTE keyBuf[keySize]; /* Place to put a key, buffered on stack. */ UBYTE valBuf[bpt->valSize]; /* Place to put a value, buffered on stack. */ if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); udcMustRead(bpt->udc, valBuf, bpt->valSize); if (memcmp(key, keyBuf, keySize) == 0) { void *val = cloneMem(valBuf, bpt->valSize); refAdd(pList, val); } } } else { /* Read first key and first file offset. */ udcMustRead(bpt->udc, keyBuf, keySize); bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped); bits64 fileOffset = lastFileOffset; int lastCmp = memcmp(key, keyBuf, keySize); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); fileOffset = udcReadBits64(bpt->udc, isSwapped); int cmp = memcmp(key, keyBuf, keySize); if (lastCmp >= 0 && cmp <= 0) { bits64 curPos = udcTell(bpt->udc); rFindMulti(bpt, lastFileOffset, key, pList); udcSeek(bpt->udc, curPos); } if (cmp < 0) return; lastCmp = cmp; lastFileOffset = fileOffset; } /* If made it all the way to end, do last one too. */ rFindMulti(bpt, fileOffset, key, pList); } }
struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi) /* Return summary of entire file! */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct bbiSummaryElement res; ZeroVar(&res); if (bbi->totalSummaryOffset != 0) { udcSeek(udc, bbi->totalSummaryOffset); res.validCount = udcReadBits64(udc, isSwapped); res.minVal = udcReadDouble(udc, isSwapped); res.maxVal = udcReadDouble(udc, isSwapped); res.sumData = udcReadDouble(udc, isSwapped); res.sumSquares = udcReadDouble(udc, isSwapped); } else if (bbi->version == 1) /* Require version 1 so as not to have to deal with compression. Should not happen * to have NULL totalSummaryOffset for non-empty version 2+ file anyway. */ { /* Find most extreme zoom. */ struct bbiZoomLevel *bestZoom = NULL, *zoom; bits32 bestReduction = 0; for (zoom = bbi->levelList; zoom != NULL; zoom = zoom->next) { if (zoom->reductionLevel > bestReduction) { bestReduction = zoom->reductionLevel; bestZoom = zoom; } } if (bestZoom != NULL) { udcSeek(udc, bestZoom->dataOffset); bits32 zoomSectionCount = udcReadBits32(udc, isSwapped); bits32 i; for (i=0; i<zoomSectionCount; ++i) { /* Read, but ignore, position. */ bits32 chromId, chromStart, chromEnd; chromId = udcReadBits32(udc, isSwapped); chromStart = udcReadBits32(udc, isSwapped); chromEnd = udcReadBits32(udc, isSwapped); /* First time through set values, rest of time add to them. */ if (i == 0) { res.validCount = udcReadBits32(udc, isSwapped); res.minVal = udcReadFloat(udc, isSwapped); res.maxVal = udcReadFloat(udc, isSwapped); res.sumData = udcReadFloat(udc, isSwapped); res.sumSquares = udcReadFloat(udc, isSwapped); } else { res.validCount += udcReadBits32(udc, isSwapped); float minVal = udcReadFloat(udc, isSwapped); if (minVal < res.minVal) res.minVal = minVal; float maxVal = udcReadFloat(udc, isSwapped); if (maxVal > res.maxVal) res.maxVal = maxVal; res.sumData += udcReadFloat(udc, isSwapped); res.sumSquares += udcReadFloat(udc, isSwapped); } } } } return res; }
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Get data for interval. Return list allocated out of lm. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalQuery on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct bbiInterval *el, *list = NULL; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; // slSort(&blockList, fileOffsetSizeCmp); struct fileOffsetSize *mergedBlocks = fileOffsetSizeMerge(blockList); for (block = mergedBlocks; block != NULL; block = block->next) { udcSeek(udc, block->offset); char *blockBuf = needLargeMem(block->size); udcRead(udc, blockBuf, block->size); char *blockPt = blockBuf, *blockEnd = blockBuf + block->size; while (blockPt < blockEnd) { struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = s + head.itemSpan; val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeFixedStep: { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { val = memReadFloat(&blockPt, isSwapped); bits32 clippedS = s, clippedE = e; if (clippedS < start) clippedS = start; if (clippedE > end) clippedE = end; if (clippedS < clippedE) { lmAllocVar(lm, el); el->start = clippedS; el->end = clippedE; el->val = val; slAddHead(&list, el); } s += head.itemStep; e += head.itemStep; } break; } default: internalErr(); break; } } } slFreeList(&mergedBlocks); slFreeList(&blockList); slReverse(&list); return list; }
bits64 bigBedItemCount(struct bbiFile *bbi) /* Return total items in file. */ { udcSeek(bbi->udc, bbi->unzoomedDataOffset); return udcReadBits64(bbi->udc, bbi->isSwapped); }
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Get data for interval. Return list allocated out of lm. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalQuery on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct bbiInterval *el, *list = NULL; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Deal with insides of block. */ struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = s + head.itemSpan; val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeFixedStep: { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { val = memReadFloat(&blockPt, isSwapped); bits32 clippedS = s, clippedE = e; if (clippedS < start) clippedS = start; if (clippedE > end) clippedE = end; if (clippedS < clippedE) { lmAllocVar(lm, el); el->start = clippedS; el->end = clippedE; el->val = val; slAddHead(&list, el); } s += head.itemStep; e += head.itemStep; } break; } default: internalErr(); break; } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); slReverse(&list); return list; }
int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount, FILE *out) /* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you * don't care how many are printed. Returns number printed. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalDump on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; int printCount = 0; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Do the actual dump. */ int oneCount = bigWigBlockDumpIntersectingRange(bwf->isSwapped, blockPt, blockEnd, chrom, start, end, maxCount, out); /* Keep track of how many dumped, not exceeding maximum. */ printCount += oneCount; if (maxCount != 0) { if (oneCount >= maxCount) break; maxCount -= oneCount; } blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); return printCount; }
static void fetchIntoBuf(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct bigWigValsOnChrom *chromVals) /* Get data for interval. Return list allocated out of lm. */ { /* A lot of code duplicated with bigWigIntervalQuery, but here the clipping * is simplified since always working across full chromosome, and the output is * different. Since both of these are in inner loops and speed critical, it's hard * to factor out without perhaps making it worse than the bit of duplication. */ if (bwf->typeSig != bigWigSig) errAbort("Trying to do fetchIntoBuf on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; Bits *covBuf = chromVals->covBuf; double *valBuf = chromVals->valBuf; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Deal with insides of block. */ struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); bitSetRange(covBuf, s, e-s); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); bitSetRange(covBuf, s, head.itemSpan); bits32 e = s + head.itemSpan; bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeFixedStep: { /* Do a little optimization for the most common and worst case - step1/span1 */ if (head.itemStep == 1 && head.itemSpan == 1) { bits32 s = head.start; bits32 e = head.end; bitSetRange(covBuf, s, e-s); bits32 j; for (j=s; j<e; ++j) valBuf[j] = memReadFloat(&blockPt, isSwapped); } else { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { bitSetRange(covBuf, s, head.itemSpan); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; s += head.itemStep; e += head.itemStep; } } break; } default: internalErr(); break; } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); }
static struct bigBedInterval *bigBedIntervalsMatchingName(struct bbiFile *bbi, struct fileOffsetSize *fosList, BbFirstWordMatch matcher, int fieldIx, void *target, struct lm *lm) /* Return list of intervals inside of sectors of bbiFile defined by fosList where the name * matches target somehow. */ { struct bigBedInterval *interval, *intervalList = NULL; struct fileOffsetSize *fos; boolean isSwapped = bbi->isSwapped; for (fos = fosList; fos != NULL; fos = fos->next) { /* Read in raw data */ udcSeek(bbi->udc, fos->offset); char *rawData = needLargeMem(fos->size); udcRead(bbi->udc, rawData, fos->size); /* Optionally uncompress data, and set data pointer to uncompressed version. */ char *uncompressedData = NULL; char *data = NULL; int dataSize = 0; if (bbi->uncompressBufSize > 0) { data = uncompressedData = needLargeMem(bbi->uncompressBufSize); dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize); } else { data = rawData; dataSize = fos->size; } /* Set up for "memRead" routines to more or less treat memory block like file */ char *blockPt = data, *blockEnd = data + dataSize; struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here /* Read next record into local variables. */ while (blockPt < blockEnd) { bits32 chromIx = memReadBits32(&blockPt, isSwapped); bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } if ((*matcher)(dy->string, fieldIx, target)) { lmAllocVar(lm, interval); interval->start = s; interval->end = e; interval->rest = cloneString(dy->string); interval->chromId = chromIx; slAddHead(&intervalList, interval); } } /* Clean up temporary buffers. */ dyStringFree(&dy); freez(&uncompressedData); freez(&rawData); } slReverse(&intervalList); return intervalList; }
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName) /* Open up big wig or big bed file. */ { /* This code needs to agree with code in two other places currently - bigBedFileCreate, * and bigWigFileCreate. I'm thinking of refactoring to share at least between * bigBedFileCreate and bigWigFileCreate. It'd be great so it could be structured * so that it could send the input in one chromosome at a time, and send in the zoom * stuff only after all the chromosomes are done. This'd potentially reduce the memory * footprint by a factor of 2 or 4. Still, for now it works. -JK */ struct bbiFile *bbi; AllocVar(bbi); bbi->fileName = cloneString(fileName); struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir()); /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustRead(udc, &magic, sizeof(magic)); if (magic != sig) { magic = byteSwap32(magic); isSwapped = TRUE; if (magic != sig) errAbort("%s is not a %s file", fileName, typeName); } bbi->typeSig = sig; bbi->isSwapped = isSwapped; /* Read rest of defined bits of header, byte swapping as needed. */ bbi->version = udcReadBits16(udc, isSwapped); bbi->zoomLevels = udcReadBits16(udc, isSwapped); bbi->chromTreeOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped); bbi->fieldCount = udcReadBits16(udc, isSwapped); bbi->definedFieldCount = udcReadBits16(udc, isSwapped); bbi->asOffset = udcReadBits64(udc, isSwapped); bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped); bbi->uncompressBufSize = udcReadBits32(udc, isSwapped); /* Skip over reserved area. */ udcSeek(udc, 64); /* Read zoom headers. */ int i; struct bbiZoomLevel *level, *levelList = NULL; for (i=0; i<bbi->zoomLevels; ++i) { AllocVar(level); level->reductionLevel = udcReadBits32(udc, isSwapped); level->reserved = udcReadBits32(udc, isSwapped); level->dataOffset = udcReadBits64(udc, isSwapped); level->indexOffset = udcReadBits64(udc, isSwapped); slAddHead(&levelList, level); } slReverse(&levelList); bbi->levelList = levelList; /* Attach B+ tree of chromosome names and ids. */ udcSeek(udc, bbi->chromTreeOffset); bbi->chromBpt = bptFileAttach(fileName, udc); return bbi; }
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset, bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList) /* Recursively find blocks with data. */ { struct udcFile *udc = crt->udc; /* Seek to start of block. */ udcSeek(udc, indexFileOffset); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(udc, isLeaf); udcMustReadOne(udc, reserved); boolean isSwapped = crt->isSwapped; childCount = udcReadBits16(udc, isSwapped); verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u. childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf); if (isLeaf) { /* Loop through node adding overlapping leaves to block list. */ for (i=0; i<childCount; ++i) { bits32 startChromIx = udcReadBits32(udc, isSwapped); bits32 startBase = udcReadBits32(udc, isSwapped); bits32 endChromIx = udcReadBits32(udc, isSwapped); bits32 endBase = udcReadBits32(udc, isSwapped); bits64 offset = udcReadBits64(udc, isSwapped); bits64 size = udcReadBits64(udc, isSwapped); if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase)) { struct fileOffsetSize *block; AllocVar(block); block->offset = offset; block->size = size; slAddHead(retList, block); } } } else { /* Read node into arrays. */ bits32 startChromIx[childCount], startBase[childCount]; bits32 endChromIx[childCount], endBase[childCount]; bits64 offset[childCount]; for (i=0; i<childCount; ++i) { startChromIx[i] = udcReadBits32(udc, isSwapped); startBase[i] = udcReadBits32(udc, isSwapped); endChromIx[i] = udcReadBits32(udc, isSwapped); endBase[i] = udcReadBits32(udc, isSwapped); offset[i] = udcReadBits64(udc, isSwapped); } /* Recurse into child nodes that we overlap. */ for (i=0; i<childCount; ++i) { if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], endChromIx[i], endBase[i])) { rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList); } } } }
static struct bbiSummary *bbiSummariesInRegion(struct bbiZoomLevel *zoom, struct bbiFile *bbi, int chromId, bits32 start, bits32 end) /* Return list of all summaries in region at given zoom level of bbiFile. */ { struct bbiSummary *sumList = NULL, *sum; struct udcFile *udc = bbi->udc; udcSeek(udc, zoom->indexOffset); struct cirTreeFile *ctf = cirTreeFileAttach(bbi->fileName, bbi->udc); struct fileOffsetSize *blockList = cirTreeFindOverlappingBlocks(ctf, chromId, start, end); struct fileOffsetSize *block, *beforeGap, *afterGap; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Figure out bounds and number of items in block. */ int blockSize = blockEnd - blockPt; struct bbiSummaryOnDisk *dSum; int itemSize = sizeof(*dSum); assert(blockSize % itemSize == 0); int itemCount = blockSize / itemSize; /* Read in items and convert to memory list format. */ int i; for (i=0; i<itemCount; ++i) { dSum = (void *)blockPt; blockPt += sizeof(*dSum); if (dSum->chromId == chromId) { int s = max(dSum->start, start); int e = min(dSum->end, end); if (s < e) { sum = bbiSummaryFromOnDisk(dSum); slAddHead(&sumList, sum); } } } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); cirTreeFileDetach(&ctf); slReverse(&sumList); return sumList; }
struct bbiFile *bbiFileOpenWithDir(char *fileName, bits32 sig, char *typeName, char *udcDir) /* same (mostly) as bbiFileOpen in bbiFile.c, but allows setting the temporary dir */ { struct bbiFile *bbi; AllocVar(bbi); bbi->fileName = cloneString(fileName); struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDir); /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustRead(udc, &magic, sizeof(magic)); if (magic != sig) { magic = byteSwap32(magic); isSwapped = TRUE; if (magic != sig) errAbort("%s is not a %s file", fileName, typeName); } bbi->typeSig = sig; bbi->isSwapped = isSwapped; /* Read rest of defined bits of header, byte swapping as needed. */ bbi->version = udcReadBits16(udc, isSwapped); bbi->zoomLevels = udcReadBits16(udc, isSwapped); bbi->chromTreeOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped); bbi->fieldCount = udcReadBits16(udc, isSwapped); bbi->definedFieldCount = udcReadBits16(udc, isSwapped); bbi->asOffset = udcReadBits64(udc, isSwapped); bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped); bbi->uncompressBufSize = udcReadBits32(udc, isSwapped); bbi->extensionOffset = udcReadBits64(udc, isSwapped); /* Read zoom headers. */ int i; struct bbiZoomLevel *level, *levelList = NULL; for (i=0; i<bbi->zoomLevels; ++i) { AllocVar(level); level->reductionLevel = udcReadBits32(udc, isSwapped); level->reserved = udcReadBits32(udc, isSwapped); level->dataOffset = udcReadBits64(udc, isSwapped); level->indexOffset = udcReadBits64(udc, isSwapped); slAddHead(&levelList, level); } slReverse(&levelList); bbi->levelList = levelList; /* Deal with header extension if any. */ if (bbi->extensionOffset != 0) { udcSeek(udc, bbi->extensionOffset); bbi->extensionSize = udcReadBits16(udc, isSwapped); bbi->extraIndexCount = udcReadBits16(udc, isSwapped); bbi->extraIndexListOffset = udcReadBits64(udc, isSwapped); } /* Attach B+ tree of chromosome names and ids. */ udcSeek(udc, bbi->chromTreeOffset); bbi->chromBpt = bptFileAttach(fileName, udc); return bbi; }
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct dyString *dy = dyStringNew(32); /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && s < end && e > start) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (dy->stringSize > 0) el->rest = lmCloneString(lm, dy->string); el->chromId = chromId; slAddHead(&list, el); } } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freeMem(uncompressBuf); dyStringFree(&dy); slFreeList(&blockList); slReverse(&list); return list; }