static void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head) /* Read section header. */ { struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; head->chromId = udcReadBits32(udc, isSwapped); head->start = udcReadBits32(udc, isSwapped); head->end = udcReadBits32(udc, isSwapped); head->itemStep = udcReadBits32(udc, isSwapped); head->itemSpan = udcReadBits32(udc, isSwapped); head->type = udcGetChar(udc); head->reserved = udcGetChar(udc); head->itemCount = udcReadBits16(udc, isSwapped); }
static void bbiSummaryOnDiskRead(struct bbiFile *bbi, struct bbiSummaryOnDisk *sum) /* Read in summary from file. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; sum->chromId = udcReadBits32(udc, isSwapped); sum->start = udcReadBits32(udc, isSwapped); sum->end = udcReadBits32(udc, isSwapped); sum->validCount = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, sum->minVal); udcMustReadOne(udc, sum->maxVal); udcMustReadOne(udc, sum->sumData); udcMustReadOne(udc, sum->sumSquares); }
struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc) /* Open up index file on previously open file, with header at current file position. */ { /* Open file and allocate structure to hold info from header etc. */ struct bptFile *bpt = needMem(sizeof(*bpt)); bpt->fileName = fileName; bpt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != bptSig) { magic = byteSwap32(magic); isSwapped = bpt->isSwapped = TRUE; if (magic != bptSig) errAbort("%s is not a bpt b-plus tree index file", fileName); } /* Read rest of defined bits of header, byte swapping as needed. */ bpt->blockSize = udcReadBits32(udc, isSwapped); bpt->keySize = udcReadBits32(udc, isSwapped); bpt->valSize = udcReadBits32(udc, isSwapped); bpt->itemCount = udcReadBits64(udc, isSwapped); /* Skip over reserved bits of header. */ bits32 reserved32; udcMustReadOne(udc, reserved32); udcMustReadOne(udc, reserved32); /* Save position of root block of b+ tree. */ bpt->rootOffset = udcTell(udc); return bpt; }
struct cirTreeFile *cirTreeFileAttach(char *fileName, struct udcFile *udc) /* Open up r-tree index file on previously open file, with cirTree * header at current file position. */ { /* Open file and allocate structure to hold info from header etc. */ struct cirTreeFile *crt = needMem(sizeof(*crt)); crt->fileName = fileName; crt->udc = udc; /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustReadOne(udc, magic); if (magic != cirTreeSig) { magic = byteSwap32(magic); isSwapped = crt->isSwapped = TRUE; if (magic != cirTreeSig) errAbort("%s is not a chromosome id r-tree index file", fileName); } /* Read rest of defined bits of header, byte swapping as needed. */ crt->blockSize = udcReadBits32(udc, isSwapped); crt->itemCount = udcReadBits64(udc, isSwapped); crt->startChromIx = udcReadBits32(udc, isSwapped); crt->startBase = udcReadBits32(udc, isSwapped); crt->endChromIx = udcReadBits32(udc, isSwapped); crt->endBase = udcReadBits32(udc, isSwapped); crt->fileSize = udcReadBits64(udc, isSwapped); crt->itemsPerSlot = udcReadBits32(udc, isSwapped); /* Skip over reserved bits of header. */ bits32 reserved32; udcMustReadOne(udc, reserved32); /* Save position of root block of r tree. */ crt->rootOffset = udcTell(udc); return crt; }
struct bbiFile *bbiFileOpenWithDir(char *fileName, bits32 sig, char *typeName, char *udcDir) /* same (mostly) as bbiFileOpen in bbiFile.c, but allows setting the temporary dir */ { struct bbiFile *bbi; AllocVar(bbi); bbi->fileName = cloneString(fileName); struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDir); /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustRead(udc, &magic, sizeof(magic)); if (magic != sig) { magic = byteSwap32(magic); isSwapped = TRUE; if (magic != sig) errAbort("%s is not a %s file", fileName, typeName); } bbi->typeSig = sig; bbi->isSwapped = isSwapped; /* Read rest of defined bits of header, byte swapping as needed. */ bbi->version = udcReadBits16(udc, isSwapped); bbi->zoomLevels = udcReadBits16(udc, isSwapped); bbi->chromTreeOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped); bbi->fieldCount = udcReadBits16(udc, isSwapped); bbi->definedFieldCount = udcReadBits16(udc, isSwapped); bbi->asOffset = udcReadBits64(udc, isSwapped); bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped); bbi->uncompressBufSize = udcReadBits32(udc, isSwapped); bbi->extensionOffset = udcReadBits64(udc, isSwapped); /* Read zoom headers. */ int i; struct bbiZoomLevel *level, *levelList = NULL; for (i=0; i<bbi->zoomLevels; ++i) { AllocVar(level); level->reductionLevel = udcReadBits32(udc, isSwapped); level->reserved = udcReadBits32(udc, isSwapped); level->dataOffset = udcReadBits64(udc, isSwapped); level->indexOffset = udcReadBits64(udc, isSwapped); slAddHead(&levelList, level); } slReverse(&levelList); bbi->levelList = levelList; /* Deal with header extension if any. */ if (bbi->extensionOffset != 0) { udcSeek(udc, bbi->extensionOffset); bbi->extensionSize = udcReadBits16(udc, isSwapped); bbi->extraIndexCount = udcReadBits16(udc, isSwapped); bbi->extraIndexListOffset = udcReadBits64(udc, isSwapped); } /* Attach B+ tree of chromosome names and ids. */ udcSeek(udc, bbi->chromTreeOffset); bbi->chromBpt = bptFileAttach(fileName, udc); return bbi; }
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset, bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList) /* Recursively find blocks with data. */ { struct udcFile *udc = crt->udc; /* Seek to start of block. */ udcSeek(udc, indexFileOffset); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(udc, isLeaf); udcMustReadOne(udc, reserved); boolean isSwapped = crt->isSwapped; childCount = udcReadBits16(udc, isSwapped); verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u. childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf); if (isLeaf) { /* Loop through node adding overlapping leaves to block list. */ for (i=0; i<childCount; ++i) { bits32 startChromIx = udcReadBits32(udc, isSwapped); bits32 startBase = udcReadBits32(udc, isSwapped); bits32 endChromIx = udcReadBits32(udc, isSwapped); bits32 endBase = udcReadBits32(udc, isSwapped); bits64 offset = udcReadBits64(udc, isSwapped); bits64 size = udcReadBits64(udc, isSwapped); if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase)) { struct fileOffsetSize *block; AllocVar(block); block->offset = offset; block->size = size; slAddHead(retList, block); } } } else { /* Read node into arrays. */ bits32 startChromIx[childCount], startBase[childCount]; bits32 endChromIx[childCount], endBase[childCount]; bits64 offset[childCount]; for (i=0; i<childCount; ++i) { startChromIx[i] = udcReadBits32(udc, isSwapped); startBase[i] = udcReadBits32(udc, isSwapped); endChromIx[i] = udcReadBits32(udc, isSwapped); endBase[i] = udcReadBits32(udc, isSwapped); offset[i] = udcReadBits64(udc, isSwapped); } /* Recurse into child nodes that we overlap. */ for (i=0; i<childCount; ++i) { if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], endChromIx[i], endBase[i])) { rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList); } } } }
static int bigWigBlockDumpIntersectingRange(struct bbiFile *bwf, char *chrom, bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out) /* Print out info on parts of block that intersect start-end, block starting at current position. */ { boolean isSwapped = bwf->isSwapped; struct udcFile *udc = bwf->udc; struct bwgSectionHead head; bwgSectionHeadRead(bwf, &head); bits16 i; float val; int outCount = 0; switch (head.type) { case bwgTypeBedGraph: { fprintf(out, "#bedGraph section %s:%u-%u\n", chrom, head.start, head.end); for (i=0; i<head.itemCount; ++i) { bits32 start = udcReadBits32(udc, isSwapped); bits32 end = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, end) > 0) { fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } } break; } case bwgTypeVariableStep: { fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan); for (i=0; i<head.itemCount; ++i) { bits32 start = udcReadBits32(udc, isSwapped); udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0) { fprintf(out, "%u\t%g\n", start+1, val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } } break; } case bwgTypeFixedStep: { boolean gotStart = FALSE; bits32 start = head.start; for (i=0; i<head.itemCount; ++i) { udcMustReadOne(udc, val); if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0) { if (!gotStart) { fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n", chrom, start, head.itemStep, head.itemSpan); gotStart = TRUE; } fprintf(out, "%g\n", val); ++outCount; if (maxCount != 0 && outCount >= maxCount) break; } start += head.itemStep; } break; } default: internalErr(); break; } return outCount; }
struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi) /* Return summary of entire file! */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct bbiSummaryElement res; ZeroVar(&res); if (bbi->totalSummaryOffset != 0) { udcSeek(udc, bbi->totalSummaryOffset); res.validCount = udcReadBits64(udc, isSwapped); res.minVal = udcReadDouble(udc, isSwapped); res.maxVal = udcReadDouble(udc, isSwapped); res.sumData = udcReadDouble(udc, isSwapped); res.sumSquares = udcReadDouble(udc, isSwapped); } else if (bbi->version == 1) /* Require version 1 so as not to have to deal with compression. Should not happen * to have NULL totalSummaryOffset for non-empty version 2+ file anyway. */ { /* Find most extreme zoom. */ struct bbiZoomLevel *bestZoom = NULL, *zoom; bits32 bestReduction = 0; for (zoom = bbi->levelList; zoom != NULL; zoom = zoom->next) { if (zoom->reductionLevel > bestReduction) { bestReduction = zoom->reductionLevel; bestZoom = zoom; } } if (bestZoom != NULL) { udcSeek(udc, bestZoom->dataOffset); bits32 zoomSectionCount = udcReadBits32(udc, isSwapped); bits32 i; for (i=0; i<zoomSectionCount; ++i) { /* Read, but ignore, position. */ bits32 chromId, chromStart, chromEnd; chromId = udcReadBits32(udc, isSwapped); chromStart = udcReadBits32(udc, isSwapped); chromEnd = udcReadBits32(udc, isSwapped); /* First time through set values, rest of time add to them. */ if (i == 0) { res.validCount = udcReadBits32(udc, isSwapped); res.minVal = udcReadFloat(udc, isSwapped); res.maxVal = udcReadFloat(udc, isSwapped); res.sumData = udcReadFloat(udc, isSwapped); res.sumSquares = udcReadFloat(udc, isSwapped); } else { res.validCount += udcReadBits32(udc, isSwapped); float minVal = udcReadFloat(udc, isSwapped); if (minVal < res.minVal) res.minVal = minVal; float maxVal = udcReadFloat(udc, isSwapped); if (maxVal > res.maxVal) res.maxVal = maxVal; res.sumData += udcReadFloat(udc, isSwapped); res.sumSquares += udcReadFloat(udc, isSwapped); } } } } return res; }
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName) /* Open up big wig or big bed file. */ { /* This code needs to agree with code in two other places currently - bigBedFileCreate, * and bigWigFileCreate. I'm thinking of refactoring to share at least between * bigBedFileCreate and bigWigFileCreate. It'd be great so it could be structured * so that it could send the input in one chromosome at a time, and send in the zoom * stuff only after all the chromosomes are done. This'd potentially reduce the memory * footprint by a factor of 2 or 4. Still, for now it works. -JK */ struct bbiFile *bbi; AllocVar(bbi); bbi->fileName = cloneString(fileName); struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir()); /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustRead(udc, &magic, sizeof(magic)); if (magic != sig) { magic = byteSwap32(magic); isSwapped = TRUE; if (magic != sig) errAbort("%s is not a %s file", fileName, typeName); } bbi->typeSig = sig; bbi->isSwapped = isSwapped; /* Read rest of defined bits of header, byte swapping as needed. */ bbi->version = udcReadBits16(udc, isSwapped); bbi->zoomLevels = udcReadBits16(udc, isSwapped); bbi->chromTreeOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped); bbi->fieldCount = udcReadBits16(udc, isSwapped); bbi->definedFieldCount = udcReadBits16(udc, isSwapped); bbi->asOffset = udcReadBits64(udc, isSwapped); bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped); bbi->uncompressBufSize = udcReadBits32(udc, isSwapped); /* Skip over reserved area. */ udcSeek(udc, 64); /* Read zoom headers. */ int i; struct bbiZoomLevel *level, *levelList = NULL; for (i=0; i<bbi->zoomLevels; ++i) { AllocVar(level); level->reductionLevel = udcReadBits32(udc, isSwapped); level->reserved = udcReadBits32(udc, isSwapped); level->dataOffset = udcReadBits64(udc, isSwapped); level->indexOffset = udcReadBits64(udc, isSwapped); slAddHead(&levelList, level); } slReverse(&levelList); bbi->levelList = levelList; /* Attach B+ tree of chromosome names and ids. */ udcSeek(udc, bbi->chromTreeOffset); bbi->chromBpt = bptFileAttach(fileName, udc); return bbi; }