static bits64 bptDataStart(struct bptFile *bpt) /* Return offset of first bit of data (as opposed to index) in file. In hind sight I wish * this were stored in the header, but fortunately it's not that hard to compute. */ { bits64 offset = bpt->rootOffset; for (;;) { /* Seek to block start */ udcSeek(bpt->udc, offset); /* Read block header, break if we are leaf. */ UBYTE isLeaf; UBYTE reserved; bits16 childCount; udcMustReadOne(bpt->udc, isLeaf); if (isLeaf) break; udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); /* Read and discard first key. */ char keyBuf[bpt->keySize]; udcMustRead(bpt->udc, keyBuf, bpt->keySize); /* Get file offset of sub-block. */ offset = udcReadBits64(bpt->udc, isSwapped); } return offset; }
static void bbiSummaryOnDiskRead(struct bbiFile *bbi, struct bbiSummaryOnDisk *sum) /* Read in summary from file. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; sum->chromId = udcReadBits32(udc, isSwapped); sum->start = udcReadBits32(udc, isSwapped); sum->end = udcReadBits32(udc, isSwapped); sum->validCount = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, sum->minVal); udcMustReadOne(udc, sum->maxVal); udcMustReadOne(udc, sum->sumData); udcMustReadOne(udc, sum->sumSquares); }
static boolean rFind(struct bptFile *bpt, bits64 blockStart, void *key, void *val) /* Find value corresponding to key. If found copy value to memory pointed to by val and return * true. Otherwise return false. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); UBYTE keyBuf[bpt->keySize]; /* Place to put a key, buffered on stack. */ if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); udcMustRead(bpt->udc, val, bpt->valSize); if (memcmp(key, keyBuf, bpt->keySize) == 0) return TRUE; } return FALSE; } else { /* Read and discard first key. */ udcMustRead(bpt->udc, keyBuf, bpt->keySize); /* Scan info for first file offset. */ bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); if (memcmp(key, keyBuf, bpt->keySize) < 0) break; fileOffset = udcReadBits64(bpt->udc, isSwapped); } return rFind(bpt, fileOffset, key, val); } }
struct cirTreeFile *cirTreeFileAttach(char *fileName, struct udcFile *udc) /* Open up r-tree index file on previously open file, with cirTree * header at current file position. */ { /* Open file and allocate structure to hold info from header etc. */ struct cirTreeFile *crt = needMem(sizeof(*crt)); crt->fileName = fileName; crt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != cirTreeSig) { magic = byteSwap32(magic); isSwapped = crt->isSwapped = TRUE; if (magic != cirTreeSig) errAbort("%s is not a chromosome id r-tree index file", fileName); } /* Read rest of defined bits of header, byte swapping as needed. */ crt->blockSize = udcReadBits32(udc, isSwapped); crt->itemCount = udcReadBits64(udc, isSwapped); crt->startChromIx = udcReadBits32(udc, isSwapped); crt->startBase = udcReadBits32(udc, isSwapped); crt->endChromIx = udcReadBits32(udc, isSwapped); crt->endBase = udcReadBits32(udc, isSwapped); crt->fileSize = udcReadBits64(udc, isSwapped); crt->itemsPerSlot = udcReadBits32(udc, isSwapped); /* Skip over reserved bits of header. */ bits32 reserved32; udcMustReadOne(udc, reserved32); /* Save position of root block of r tree. */ crt->rootOffset = udcTell(udc); return crt; }
static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) /* Recursively go across tree, calling callback at leaves. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); char keyBuf[bpt->keySize], valBuf[bpt->valSize]; if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); udcMustRead(bpt->udc, valBuf, bpt->valSize); callback(context, keyBuf, bpt->keySize, valBuf, bpt->valSize); } } else { bits64 fileOffsets[childCount]; /* Loop through to get file offsets of children. */ for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped); } /* Loop through recursing on child offsets. */ for (i=0; i<childCount; ++i) rTraverse(bpt, fileOffsets[i], context, callback); } }
struct crTreeFile *crTreeFileOpen(char *fileName) /* Open up r-tree index file - reading headers and verifying things. */ { /* Open file and allocate structure to hold info from header etc. */ struct udcFile *udc = udcFileOpen(fileName, udcDefaultDir()); struct crTreeFile *crt = needMem(sizeof(*crt)); fileName = crt->fileName = cloneString(fileName); crt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != crTreeSig) { magic = byteSwap32(magic); isSwapped = crt->isSwapped = TRUE; if (magic != crTreeSig) errAbort("%s is not a chromosome r-tree index file", fileName); } /* Read rest of high level header including notably the offsets to the * chromosome and range indexes. */ bits32 reserved32; udcMustReadOne(udc, reserved32); crt->chromOffset = udcReadBits64(udc, isSwapped); crt->cirOffset = udcReadBits64(udc, isSwapped); /* Read in the chromosome index header. */ udcSeek(udc, crt->chromOffset); crt->chromBpt = bptFileAttach(fileName, udc); /* Read in range index header. */ udcSeek(udc, crt->cirOffset); crt->cir = cirTreeFileAttach(fileName, udc); return crt; }
struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc) /* Open up index file on previously open file, with header at current file position. */ { /* Open file and allocate structure to hold info from header etc. */ struct bptFile *bpt = needMem(sizeof(*bpt)); bpt->fileName = fileName; bpt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != bptSig) { magic = byteSwap32(magic); isSwapped = bpt->isSwapped = TRUE; if (magic != bptSig) errAbort("%s is not a bpt b-plus tree index file", fileName); } /* Read rest of defined bits of header, byte swapping as needed. */ bpt->blockSize = udcReadBits32(udc, isSwapped); bpt->keySize = udcReadBits32(udc, isSwapped); bpt->valSize = udcReadBits32(udc, isSwapped); bpt->itemCount = udcReadBits64(udc, isSwapped); /* Skip over reserved bits of header. */ bits32 reserved32; udcMustReadOne(udc, reserved32); udcMustReadOne(udc, reserved32); /* Save position of root block of b+ tree. */ bpt->rootOffset = udcTell(udc); return bpt; }
static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList) /* Find values corresponding to key and add them to pList. You'll need to * Do a slRefFreeListAndVals() on the list when done. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); int keySize = bpt->keySize; UBYTE keyBuf[keySize]; /* Place to put a key, buffered on stack. */ UBYTE valBuf[bpt->valSize]; /* Place to put a value, buffered on stack. */ if (isLeaf) { for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); udcMustRead(bpt->udc, valBuf, bpt->valSize); if (memcmp(key, keyBuf, keySize) == 0) { void *val = cloneMem(valBuf, bpt->valSize); refAdd(pList, val); } } } else { /* Read first key and first file offset. */ udcMustRead(bpt->udc, keyBuf, keySize); bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped); bits64 fileOffset = lastFileOffset; int lastCmp = memcmp(key, keyBuf, keySize); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, keySize); fileOffset = udcReadBits64(bpt->udc, isSwapped); int cmp = memcmp(key, keyBuf, keySize); if (lastCmp >= 0 && cmp <= 0) { bits64 curPos = udcTell(bpt->udc); rFindMulti(bpt, lastFileOffset, key, pList); udcSeek(bpt->udc, curPos); } if (cmp < 0) return; lastCmp = cmp; lastFileOffset = fileOffset; } /* If made it all the way to end, do last one too. */ rFindMulti(bpt, fileOffset, key, pList); } }
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset, bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList) /* Recursively find blocks with data. */ { struct udcFile *udc = crt->udc; /* Seek to start of block. */ udcSeek(udc, indexFileOffset); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(udc, isLeaf); udcMustReadOne(udc, reserved); boolean isSwapped = crt->isSwapped; childCount = udcReadBits16(udc, isSwapped); verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u. childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf); if (isLeaf) { /* Loop through node adding overlapping leaves to block list. */ for (i=0; i<childCount; ++i) { bits32 startChromIx = udcReadBits32(udc, isSwapped); bits32 startBase = udcReadBits32(udc, isSwapped); bits32 endChromIx = udcReadBits32(udc, isSwapped); bits32 endBase = udcReadBits32(udc, isSwapped); bits64 offset = udcReadBits64(udc, isSwapped); bits64 size = udcReadBits64(udc, isSwapped); if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase)) { struct fileOffsetSize *block; AllocVar(block); block->offset = offset; block->size = size; slAddHead(retList, block); } } } else { /* Read node into arrays. */ bits32 startChromIx[childCount], startBase[childCount]; bits32 endChromIx[childCount], endBase[childCount]; bits64 offset[childCount]; for (i=0; i<childCount; ++i) { startChromIx[i] = udcReadBits32(udc, isSwapped); startBase[i] = udcReadBits32(udc, isSwapped); endChromIx[i] = udcReadBits32(udc, isSwapped); endBase[i] = udcReadBits32(udc, isSwapped); offset[i] = udcReadBits64(udc, isSwapped); } /* Recurse into child nodes that we overlap. */ for (i=0; i<childCount; ++i) { if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], endChromIx[i], endBase[i])) { rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList); } } } }
static int bigWigBlockDumpIntersectingRange(struct bbiFile *bwf, char *chrom, bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out) /* Print out info on parts of block that intersect start-end, block starting at current position. */ { boolean isSwapped = bwf->isSwapped; struct udcFile *udc = bwf->udc; struct bwgSectionHead head; bwgSectionHeadRead(bwf, &head); bits16 i; float val; int outCount = 0; switch (head.type) { case bwgTypeBedGraph: { fprintf(out, "#bedGraph section %s:%u-%u\n", chrom, head.start, head.end); for (i=0; i<head.itemCount; ++i) { bits32 start = udcReadBits32(udc, isSwapped); bits32 end = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, end) > 0) { fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } } break; } case bwgTypeVariableStep: { fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan); for (i=0; i<head.itemCount; ++i) { bits32 start = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0) { fprintf(out, "%u\t%g\n", start+1, val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } } break; } case bwgTypeFixedStep: { boolean gotStart = FALSE; bits32 start = head.start; for (i=0; i<head.itemCount; ++i) { udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0) { if (!gotStart) { fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n", chrom, start, head.itemStep, head.itemSpan); gotStart = TRUE; } fprintf(out, "%g\n", val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } start += head.itemStep; } break; } default: internalErr(); break; } return outCount; }