static void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head)
/* Read section header. */
{
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
head->chromId = udcReadBits32(udc, isSwapped);
head->start = udcReadBits32(udc, isSwapped);
head->end = udcReadBits32(udc, isSwapped);
head->itemStep = udcReadBits32(udc, isSwapped);
head->itemSpan = udcReadBits32(udc, isSwapped);
head->type = udcGetChar(udc);
head->reserved = udcGetChar(udc);
head->itemCount = udcReadBits16(udc, isSwapped);
}
static void bbiSummaryOnDiskRead(struct bbiFile *bbi, struct bbiSummaryOnDisk *sum)
/* Read in summary from file. */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
sum->chromId = udcReadBits32(udc, isSwapped);
sum->start = udcReadBits32(udc, isSwapped);
sum->end = udcReadBits32(udc, isSwapped);
sum->validCount = udcReadBits32(udc, isSwapped);
udcMustReadOne(udc, sum->minVal);
udcMustReadOne(udc, sum->maxVal);
udcMustReadOne(udc, sum->sumData);
udcMustReadOne(udc, sum->sumSquares);
}
Ejemplo n.º 3
0
struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc)
/* Open up index file on previously open file, with header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct bptFile *bpt = needMem(sizeof(*bpt));
bpt->fileName = fileName;
bpt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != bptSig)
    {
    magic = byteSwap32(magic);
    isSwapped = bpt->isSwapped = TRUE;
    if (magic != bptSig)
       errAbort("%s is not a bpt b-plus tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
bpt->blockSize = udcReadBits32(udc, isSwapped);
bpt->keySize = udcReadBits32(udc, isSwapped);
bpt->valSize = udcReadBits32(udc, isSwapped);
bpt->itemCount = udcReadBits64(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);
udcMustReadOne(udc, reserved32);

/* Save position of root block of b+ tree. */
bpt->rootOffset = udcTell(udc);

return bpt;
}
Ejemplo n.º 4
0
struct cirTreeFile *cirTreeFileAttach(char *fileName, struct udcFile *udc)
/* Open up r-tree index file on previously open file, with cirTree
 * header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct cirTreeFile *crt = needMem(sizeof(*crt));
crt->fileName = fileName;
crt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != cirTreeSig)
    {
    magic = byteSwap32(magic);
    isSwapped = crt->isSwapped = TRUE;
    if (magic != cirTreeSig)
       errAbort("%s is not a chromosome id r-tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
crt->blockSize = udcReadBits32(udc, isSwapped);
crt->itemCount = udcReadBits64(udc, isSwapped);
crt->startChromIx = udcReadBits32(udc, isSwapped);
crt->startBase = udcReadBits32(udc, isSwapped);
crt->endChromIx = udcReadBits32(udc, isSwapped);
crt->endBase = udcReadBits32(udc, isSwapped);
crt->fileSize = udcReadBits64(udc, isSwapped);
crt->itemsPerSlot = udcReadBits32(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);

/* Save position of root block of r tree. */
crt->rootOffset = udcTell(udc);

return crt;
}
Ejemplo n.º 5
0
struct bbiFile *bbiFileOpenWithDir(char *fileName, bits32 sig, char *typeName, char *udcDir)
/* same (mostly) as bbiFileOpen in bbiFile.c, but allows setting the temporary dir */
{
    struct bbiFile *bbi;
    AllocVar(bbi);
    bbi->fileName = cloneString(fileName);
    struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDir);
    /* Read magic number at head of file and use it to see if we are proper file type, and
     * see if we are byte-swapped. */
    bits32 magic;
    boolean isSwapped = FALSE;
    udcMustRead(udc, &magic, sizeof(magic));
    if (magic != sig)
    {
	magic = byteSwap32(magic);
	isSwapped = TRUE;
	if (magic != sig)
	    errAbort("%s is not a %s file", fileName, typeName);
    }
    bbi->typeSig = sig;
    bbi->isSwapped = isSwapped;

/* Read rest of defined bits of header, byte swapping as needed. */
    bbi->version = udcReadBits16(udc, isSwapped);
    bbi->zoomLevels = udcReadBits16(udc, isSwapped);
    bbi->chromTreeOffset = udcReadBits64(udc, isSwapped);
    bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped);
    bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped);
    bbi->fieldCount = udcReadBits16(udc, isSwapped);
    bbi->definedFieldCount = udcReadBits16(udc, isSwapped);
    bbi->asOffset = udcReadBits64(udc, isSwapped);
    bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped);
    bbi->uncompressBufSize = udcReadBits32(udc, isSwapped);
    bbi->extensionOffset = udcReadBits64(udc, isSwapped);

/* Read zoom headers. */
    int i;
    struct bbiZoomLevel *level, *levelList = NULL;
    for (i=0; i<bbi->zoomLevels; ++i)
    {
	AllocVar(level);
	level->reductionLevel = udcReadBits32(udc, isSwapped);
	level->reserved = udcReadBits32(udc, isSwapped);
	level->dataOffset = udcReadBits64(udc, isSwapped);
	level->indexOffset = udcReadBits64(udc, isSwapped);
	slAddHead(&levelList, level);
    }
    slReverse(&levelList);
    bbi->levelList = levelList;

/* Deal with header extension if any. */
    if (bbi->extensionOffset != 0)
    {
	udcSeek(udc, bbi->extensionOffset);
	bbi->extensionSize = udcReadBits16(udc, isSwapped);
	bbi->extraIndexCount = udcReadBits16(udc, isSwapped);
	bbi->extraIndexListOffset = udcReadBits64(udc, isSwapped);
    }

/* Attach B+ tree of chromosome names and ids. */
    udcSeek(udc, bbi->chromTreeOffset);
    bbi->chromBpt =  bptFileAttach(fileName, udc);

    return bbi;
}
Ejemplo n.º 6
0
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset,
	bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList)
/* Recursively find blocks with data. */
{
struct udcFile *udc = crt->udc;

/* Seek to start of block. */
udcSeek(udc, indexFileOffset);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(udc, isLeaf);
udcMustReadOne(udc, reserved);
boolean isSwapped = crt->isSwapped;
childCount = udcReadBits16(udc, isSwapped);

verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u.  childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf);

if (isLeaf)
    {
    /* Loop through node adding overlapping leaves to block list. */
    for (i=0; i<childCount; ++i)
        {
	bits32 startChromIx = udcReadBits32(udc, isSwapped);
	bits32 startBase = udcReadBits32(udc, isSwapped);
	bits32 endChromIx = udcReadBits32(udc, isSwapped);
	bits32 endBase = udcReadBits32(udc, isSwapped);
	bits64 offset = udcReadBits64(udc, isSwapped);
	bits64 size = udcReadBits64(udc, isSwapped);
	if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase))
	    {
	    struct fileOffsetSize *block;
	    AllocVar(block);
	    block->offset = offset;
	    block->size = size;
	    slAddHead(retList, block);
	    }
	}
    }
else
    {
    /* Read node into arrays. */
    bits32 startChromIx[childCount], startBase[childCount];
    bits32 endChromIx[childCount], endBase[childCount];
    bits64 offset[childCount];
    for (i=0; i<childCount; ++i)
        {
	startChromIx[i] = udcReadBits32(udc, isSwapped);
	startBase[i] = udcReadBits32(udc, isSwapped);
	endChromIx[i] = udcReadBits32(udc, isSwapped);
	endBase[i] = udcReadBits32(udc, isSwapped);
	offset[i] = udcReadBits64(udc, isSwapped);
	}

    /* Recurse into child nodes that we overlap. */
    for (i=0; i<childCount; ++i)
	{
	if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], 
		endChromIx[i], endBase[i]))
	    {
	    rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList);
	    }
	}
    }
}
Ejemplo n.º 7
0
static int bigWigBlockDumpIntersectingRange(struct bbiFile *bwf, char *chrom, 
	bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out)
/* Print out info on parts of block that intersect start-end, block starting at current position. */
{
boolean isSwapped = bwf->isSwapped;
struct udcFile *udc = bwf->udc;
struct bwgSectionHead head;
bwgSectionHeadRead(bwf, &head);
bits16 i;
float val;
int outCount = 0;

switch (head.type)
    {
    case bwgTypeBedGraph:
	{
	fprintf(out, "#bedGraph section %s:%u-%u\n",  chrom, head.start, head.end);
	for (i=0; i<head.itemCount; ++i)
	    {
	    bits32 start = udcReadBits32(udc, isSwapped);
	    bits32 end = udcReadBits32(udc, isSwapped);
	    udcMustReadOne(udc, val);
	    if (rangeIntersection(rangeStart, rangeEnd, start, end) > 0)
		{
		fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val);
		++outCount;
		if (maxCount != 0 && outCount >= maxCount)
		    break;
		}
	    }
	break;
	}
    case bwgTypeVariableStep:
	{
	fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan);
	for (i=0; i<head.itemCount; ++i)
	    {
	    bits32 start = udcReadBits32(udc, isSwapped);
	    udcMustReadOne(udc, val);
	    if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0)
		{
		fprintf(out, "%u\t%g\n", start+1, val);
		++outCount;
		if (maxCount != 0 && outCount >= maxCount)
		    break;
		}
	    }
	break;
	}
    case bwgTypeFixedStep:
	{
	boolean gotStart = FALSE;
	bits32 start = head.start;
	for (i=0; i<head.itemCount; ++i)
	    {
	    udcMustReadOne(udc, val);
	    if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0)
	        {
		if (!gotStart)
		    {
		    fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n", 
			    chrom, start, head.itemStep, head.itemSpan);
		    gotStart = TRUE;
		    }
		fprintf(out, "%g\n", val);
		++outCount;
		if (maxCount != 0 && outCount >= maxCount)
		    break;
		}
	    start += head.itemStep;
	    }
	break;
	}
    default:
        internalErr();
	break;
    }
return outCount;
}
struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi)
/* Return summary of entire file! */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct bbiSummaryElement res;
ZeroVar(&res);

if (bbi->totalSummaryOffset != 0)
    {
    udcSeek(udc, bbi->totalSummaryOffset);
    res.validCount = udcReadBits64(udc, isSwapped);
    res.minVal = udcReadDouble(udc, isSwapped);
    res.maxVal = udcReadDouble(udc, isSwapped);
    res.sumData = udcReadDouble(udc, isSwapped);
    res.sumSquares = udcReadDouble(udc, isSwapped);
    }
else if (bbi->version == 1)
    /* Require version 1 so as not to have to deal with compression.  Should not happen
     * to have NULL totalSummaryOffset for non-empty version 2+ file anyway. */
    {
    /* Find most extreme zoom. */
    struct bbiZoomLevel *bestZoom = NULL, *zoom;
    bits32 bestReduction = 0;
    for (zoom = bbi->levelList; zoom != NULL; zoom = zoom->next)
	{
	if (zoom->reductionLevel > bestReduction)
	    {
	    bestReduction = zoom->reductionLevel;
	    bestZoom = zoom;
	    }
	}

    if (bestZoom != NULL)
	{
	udcSeek(udc, bestZoom->dataOffset);
	bits32 zoomSectionCount = udcReadBits32(udc, isSwapped);
	bits32 i;
	for (i=0; i<zoomSectionCount; ++i)
	    {
	    /* Read, but ignore, position. */
	    bits32 chromId, chromStart, chromEnd;
	    chromId = udcReadBits32(udc, isSwapped);
	    chromStart = udcReadBits32(udc, isSwapped);
	    chromEnd = udcReadBits32(udc, isSwapped);

	    /* First time through set values, rest of time add to them. */
	    if (i == 0)
		{
		res.validCount = udcReadBits32(udc, isSwapped);
		res.minVal = udcReadFloat(udc, isSwapped);
		res.maxVal = udcReadFloat(udc, isSwapped);
		res.sumData = udcReadFloat(udc, isSwapped);
		res.sumSquares = udcReadFloat(udc, isSwapped);
		}
	    else
		{
		res.validCount += udcReadBits32(udc, isSwapped);
		float minVal = udcReadFloat(udc, isSwapped);
		if (minVal < res.minVal) res.minVal = minVal;
		float maxVal = udcReadFloat(udc, isSwapped);
		if (maxVal > res.maxVal) res.maxVal = maxVal;
		res.sumData += udcReadFloat(udc, isSwapped);
		res.sumSquares += udcReadFloat(udc, isSwapped);
		}
	    }
	}
    }
return res;
}
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName)
/* Open up big wig or big bed file. */
{
/* This code needs to agree with code in two other places currently - bigBedFileCreate,
 * and bigWigFileCreate.  I'm thinking of refactoring to share at least between
 * bigBedFileCreate and bigWigFileCreate.  It'd be great so it could be structured
 * so that it could send the input in one chromosome at a time, and send in the zoom
 * stuff only after all the chromosomes are done.  This'd potentially reduce the memory
 * footprint by a factor of 2 or 4.  Still, for now it works. -JK */
struct bbiFile *bbi;
AllocVar(bbi);
bbi->fileName = cloneString(fileName);
struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir());

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustRead(udc, &magic, sizeof(magic));
if (magic != sig)
    {
    magic = byteSwap32(magic);
    isSwapped = TRUE;
    if (magic != sig)
       errAbort("%s is not a %s file", fileName, typeName);
    }
bbi->typeSig = sig;
bbi->isSwapped = isSwapped;

/* Read rest of defined bits of header, byte swapping as needed. */
bbi->version = udcReadBits16(udc, isSwapped);
bbi->zoomLevels = udcReadBits16(udc, isSwapped);
bbi->chromTreeOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped);
bbi->fieldCount = udcReadBits16(udc, isSwapped);
bbi->definedFieldCount = udcReadBits16(udc, isSwapped);
bbi->asOffset = udcReadBits64(udc, isSwapped);
bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped);
bbi->uncompressBufSize = udcReadBits32(udc, isSwapped);

/* Skip over reserved area. */
udcSeek(udc, 64);

/* Read zoom headers. */
int i;
struct bbiZoomLevel *level, *levelList = NULL;
for (i=0; i<bbi->zoomLevels; ++i)
    {
    AllocVar(level);
    level->reductionLevel = udcReadBits32(udc, isSwapped);
    level->reserved = udcReadBits32(udc, isSwapped);
    level->dataOffset = udcReadBits64(udc, isSwapped);
    level->indexOffset = udcReadBits64(udc, isSwapped);
    slAddHead(&levelList, level);
    }
slReverse(&levelList);
bbi->levelList = levelList;

/* Attach B+ tree of chromosome names and ids. */
udcSeek(udc, bbi->chromTreeOffset);
bbi->chromBpt =  bptFileAttach(fileName, udc);

return bbi;
}