예제 #1
0
static boolean rFind(struct bptFile *bpt, bits64 blockStart, void *key, void *val)
/* Find value corresponding to key.  If found copy value to memory pointed to by val and return 
 * true. Otherwise return false. */
{
/* Seek to start of block. */
udcSeek(bpt->udc, blockStart);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(bpt->udc, isLeaf);
udcMustReadOne(bpt->udc, reserved);
boolean isSwapped = bpt->isSwapped;
childCount = udcReadBits16(bpt->udc, isSwapped);

UBYTE keyBuf[bpt->keySize];   /* Place to put a key, buffered on stack. */

if (isLeaf)
    {
    for (i=0; i<childCount; ++i)
        {
	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
	udcMustRead(bpt->udc, val, bpt->valSize);
	if (memcmp(key, keyBuf, bpt->keySize) == 0)
	    return TRUE;
	}
    return FALSE;
    }
else
    {
    /* Read and discard first key. */
    udcMustRead(bpt->udc, keyBuf, bpt->keySize);

    /* Scan info for first file offset. */
    bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped);

    /* Loop through remainder. */
    for (i=1; i<childCount; ++i)
	{
	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
	if (memcmp(key, keyBuf, bpt->keySize) < 0)
	    break;
	fileOffset = udcReadBits64(bpt->udc, isSwapped);
	}
    return rFind(bpt, fileOffset, key, val);
    }
}
예제 #2
0
static bits64 bptDataStart(struct bptFile *bpt)
/* Return offset of first bit of data (as opposed to index) in file.  In hind sight I wish
 * this were stored in the header, but fortunately it's not that hard to compute. */
{
bits64 offset = bpt->rootOffset;
for (;;)
    {
    /* Seek to block start */
    udcSeek(bpt->udc, offset);

    /* Read block header,  break if we are leaf. */
    UBYTE isLeaf;
    UBYTE reserved;
    bits16 childCount;
    udcMustReadOne(bpt->udc, isLeaf);
    if (isLeaf)
         break;
    udcMustReadOne(bpt->udc, reserved);
    boolean isSwapped = bpt->isSwapped;
    childCount = udcReadBits16(bpt->udc, isSwapped);

    /* Read and discard first key. */
    char keyBuf[bpt->keySize];
    udcMustRead(bpt->udc, keyBuf, bpt->keySize);

    /* Get file offset of sub-block. */
    offset = udcReadBits64(bpt->udc, isSwapped);
    }
return offset;
}
예제 #3
0
파일: bigBed.c 프로젝트: cestmoi7/AGAPE
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx)
/* Return index associated with fieldName.  Aborts if no such index.  Optionally return
 * index in a row of this field. */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct asObject *as = bigBedAsOrDefault(bbi);
struct asColumn *col = asColumnFind(as, fieldName);
if (col == NULL)
    errAbort("No field %s in %s", fieldName, bbi->fileName);
int colIx = slIxFromElement(as->columnList, col);
if (retFieldIx != NULL)
   *retFieldIx = colIx;
asObjectFree(&as);

/* See if we have any extra indexes, and if so seek to there. */
bits64 offset = bbi->extraIndexListOffset;
if (offset == 0)
   errAbort("%s has no indexes", bbi->fileName);
udcSeek(udc, offset);

/* Go through each extra index and see if it's a match */
int i;
for (i=0; i<bbi->extraIndexCount; ++i)
    {
    bits16 type = udcReadBits16(udc, isSwapped);
    bits16 fieldCount = udcReadBits16(udc, isSwapped);
    bits64 fileOffset = udcReadBits64(udc, isSwapped);
    udcSeekCur(udc, 4);    // skip over reserved bits

    if (type != 0)
        {
	warn("Don't understand type %d", type);
	internalErr();
	}
    if (fieldCount == 1)
        {
	bits16 fieldId = udcReadBits16(udc, isSwapped);
	udcSeekCur(udc, 2);    // skip over reserved bits
	if (fieldId == colIx)
	    {
	    udcSeek(udc, fileOffset);
	    struct bptFile *bpt = bptFileAttach(bbi->fileName, udc);
	    return bpt;
	    }
	}
    else
        {
	warn("Not yet understanding indexes on multiple fields at once.");
	internalErr();
	}
    }

errAbort("%s is not indexed in %s", fieldName, bbi->fileName);
return NULL;
}
예제 #4
0
struct cirTreeFile *cirTreeFileAttach(char *fileName, struct udcFile *udc)
/* Open up r-tree index file on previously open file, with cirTree
 * header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct cirTreeFile *crt = needMem(sizeof(*crt));
crt->fileName = fileName;
crt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != cirTreeSig)
    {
    magic = byteSwap32(magic);
    isSwapped = crt->isSwapped = TRUE;
    if (magic != cirTreeSig)
       errAbort("%s is not a chromosome id r-tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
crt->blockSize = udcReadBits32(udc, isSwapped);
crt->itemCount = udcReadBits64(udc, isSwapped);
crt->startChromIx = udcReadBits32(udc, isSwapped);
crt->startBase = udcReadBits32(udc, isSwapped);
crt->endChromIx = udcReadBits32(udc, isSwapped);
crt->endBase = udcReadBits32(udc, isSwapped);
crt->fileSize = udcReadBits64(udc, isSwapped);
crt->itemsPerSlot = udcReadBits32(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);

/* Save position of root block of r tree. */
crt->rootOffset = udcTell(udc);

return crt;
}
예제 #5
0
파일: crTree.c 프로젝트: blumroy/kentUtils
struct crTreeFile *crTreeFileOpen(char *fileName)
/* Open up r-tree index file - reading headers and verifying things. */
{
/* Open file and allocate structure to hold info from header etc. */
struct udcFile *udc = udcFileOpen(fileName, udcDefaultDir());
struct crTreeFile *crt = needMem(sizeof(*crt));
fileName = crt->fileName = cloneString(fileName);
crt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != crTreeSig)
    {
    magic = byteSwap32(magic);
    isSwapped = crt->isSwapped = TRUE;
    if (magic != crTreeSig)
       errAbort("%s is not a chromosome r-tree index file", fileName);
    }

/* Read rest of high level header including notably the offsets to the
 * chromosome and range indexes. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);
crt->chromOffset = udcReadBits64(udc, isSwapped);
crt->cirOffset = udcReadBits64(udc, isSwapped);

/* Read in the chromosome index header. */
udcSeek(udc, crt->chromOffset);
crt->chromBpt = bptFileAttach(fileName, udc);

/* Read in range index header. */
udcSeek(udc, crt->cirOffset);
crt->cir = cirTreeFileAttach(fileName, udc);

return crt;
}
예제 #6
0
static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, 
    void (*callback)(void *context, void *key, int keySize, void *val, int valSize) )
/* Recursively go across tree, calling callback at leaves. */
{
/* Seek to start of block. */
udcSeek(bpt->udc, blockStart);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(bpt->udc, isLeaf);
udcMustReadOne(bpt->udc, reserved);
boolean isSwapped = bpt->isSwapped;
childCount = udcReadBits16(bpt->udc, isSwapped);

char keyBuf[bpt->keySize], valBuf[bpt->valSize];
if (isLeaf)
    {
    for (i=0; i<childCount; ++i)
        {
	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
	udcMustRead(bpt->udc, valBuf, bpt->valSize);
	callback(context, keyBuf, bpt->keySize, valBuf, bpt->valSize);
	}
    }
else
    {
    bits64 fileOffsets[childCount];
    /* Loop through to get file offsets of children. */
    for (i=0; i<childCount; ++i)
	{
	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
	fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped);
	}
    /* Loop through recursing on child offsets. */
    for (i=0; i<childCount; ++i)
	rTraverse(bpt, fileOffsets[i], context, callback);
    }
}
예제 #7
0
struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc)
/* Open up index file on previously open file, with header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct bptFile *bpt = needMem(sizeof(*bpt));
bpt->fileName = fileName;
bpt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != bptSig)
    {
    magic = byteSwap32(magic);
    isSwapped = bpt->isSwapped = TRUE;
    if (magic != bptSig)
       errAbort("%s is not a bpt b-plus tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
bpt->blockSize = udcReadBits32(udc, isSwapped);
bpt->keySize = udcReadBits32(udc, isSwapped);
bpt->valSize = udcReadBits32(udc, isSwapped);
bpt->itemCount = udcReadBits64(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);
udcMustReadOne(udc, reserved32);

/* Save position of root block of b+ tree. */
bpt->rootOffset = udcTell(udc);

return bpt;
}
예제 #8
0
static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList)
/* Find values corresponding to key and add them to pList.  You'll need to 
 * Do a slRefFreeListAndVals() on the list when done. */
{
/* Seek to start of block. */
udcSeek(bpt->udc, blockStart);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(bpt->udc, isLeaf);
udcMustReadOne(bpt->udc, reserved);
boolean isSwapped = bpt->isSwapped;
childCount = udcReadBits16(bpt->udc, isSwapped);

int keySize = bpt->keySize;
UBYTE keyBuf[keySize];   /* Place to put a key, buffered on stack. */
UBYTE valBuf[bpt->valSize];   /* Place to put a value, buffered on stack. */

if (isLeaf)
    {
    for (i=0; i<childCount; ++i)
        {
	udcMustRead(bpt->udc, keyBuf, keySize);
	udcMustRead(bpt->udc, valBuf, bpt->valSize);
	if (memcmp(key, keyBuf, keySize) == 0)
	    {
	    void *val = cloneMem(valBuf, bpt->valSize);
	    refAdd(pList, val);
	    }
	}
    }
else
    {
    /* Read first key and first file offset. */
    udcMustRead(bpt->udc, keyBuf, keySize);
    bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped);
    bits64 fileOffset = lastFileOffset;
    int lastCmp = memcmp(key, keyBuf, keySize);

    /* Loop through remainder. */
    for (i=1; i<childCount; ++i)
	{
	udcMustRead(bpt->udc, keyBuf, keySize);
	fileOffset = udcReadBits64(bpt->udc, isSwapped);
	int cmp = memcmp(key, keyBuf, keySize);
	if (lastCmp >= 0 && cmp <= 0)
	    {
	    bits64 curPos = udcTell(bpt->udc);
	    rFindMulti(bpt, lastFileOffset, key, pList);
	    udcSeek(bpt->udc, curPos);
	    }
	if (cmp < 0)
	    return;
	lastCmp = cmp;
	lastFileOffset = fileOffset;
	}
    /* If made it all the way to end, do last one too. */
    rFindMulti(bpt, fileOffset, key, pList);
    }
}
예제 #9
0
struct bbiFile *bbiFileOpenWithDir(char *fileName, bits32 sig, char *typeName, char *udcDir)
/* same (mostly) as bbiFileOpen in bbiFile.c, but allows setting the temporary dir */
{
    struct bbiFile *bbi;
    AllocVar(bbi);
    bbi->fileName = cloneString(fileName);
    struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDir);
    /* Read magic number at head of file and use it to see if we are proper file type, and
     * see if we are byte-swapped. */
    bits32 magic;
    boolean isSwapped = FALSE;
    udcMustRead(udc, &magic, sizeof(magic));
    if (magic != sig)
    {
	magic = byteSwap32(magic);
	isSwapped = TRUE;
	if (magic != sig)
	    errAbort("%s is not a %s file", fileName, typeName);
    }
    bbi->typeSig = sig;
    bbi->isSwapped = isSwapped;

/* Read rest of defined bits of header, byte swapping as needed. */
    bbi->version = udcReadBits16(udc, isSwapped);
    bbi->zoomLevels = udcReadBits16(udc, isSwapped);
    bbi->chromTreeOffset = udcReadBits64(udc, isSwapped);
    bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped);
    bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped);
    bbi->fieldCount = udcReadBits16(udc, isSwapped);
    bbi->definedFieldCount = udcReadBits16(udc, isSwapped);
    bbi->asOffset = udcReadBits64(udc, isSwapped);
    bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped);
    bbi->uncompressBufSize = udcReadBits32(udc, isSwapped);
    bbi->extensionOffset = udcReadBits64(udc, isSwapped);

/* Read zoom headers. */
    int i;
    struct bbiZoomLevel *level, *levelList = NULL;
    for (i=0; i<bbi->zoomLevels; ++i)
    {
	AllocVar(level);
	level->reductionLevel = udcReadBits32(udc, isSwapped);
	level->reserved = udcReadBits32(udc, isSwapped);
	level->dataOffset = udcReadBits64(udc, isSwapped);
	level->indexOffset = udcReadBits64(udc, isSwapped);
	slAddHead(&levelList, level);
    }
    slReverse(&levelList);
    bbi->levelList = levelList;

/* Deal with header extension if any. */
    if (bbi->extensionOffset != 0)
    {
	udcSeek(udc, bbi->extensionOffset);
	bbi->extensionSize = udcReadBits16(udc, isSwapped);
	bbi->extraIndexCount = udcReadBits16(udc, isSwapped);
	bbi->extraIndexListOffset = udcReadBits64(udc, isSwapped);
    }

/* Attach B+ tree of chromosome names and ids. */
    udcSeek(udc, bbi->chromTreeOffset);
    bbi->chromBpt =  bptFileAttach(fileName, udc);

    return bbi;
}
예제 #10
0
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset,
	bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList)
/* Recursively find blocks with data. */
{
struct udcFile *udc = crt->udc;

/* Seek to start of block. */
udcSeek(udc, indexFileOffset);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(udc, isLeaf);
udcMustReadOne(udc, reserved);
boolean isSwapped = crt->isSwapped;
childCount = udcReadBits16(udc, isSwapped);

verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u.  childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf);

if (isLeaf)
    {
    /* Loop through node adding overlapping leaves to block list. */
    for (i=0; i<childCount; ++i)
        {
	bits32 startChromIx = udcReadBits32(udc, isSwapped);
	bits32 startBase = udcReadBits32(udc, isSwapped);
	bits32 endChromIx = udcReadBits32(udc, isSwapped);
	bits32 endBase = udcReadBits32(udc, isSwapped);
	bits64 offset = udcReadBits64(udc, isSwapped);
	bits64 size = udcReadBits64(udc, isSwapped);
	if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase))
	    {
	    struct fileOffsetSize *block;
	    AllocVar(block);
	    block->offset = offset;
	    block->size = size;
	    slAddHead(retList, block);
	    }
	}
    }
else
    {
    /* Read node into arrays. */
    bits32 startChromIx[childCount], startBase[childCount];
    bits32 endChromIx[childCount], endBase[childCount];
    bits64 offset[childCount];
    for (i=0; i<childCount; ++i)
        {
	startChromIx[i] = udcReadBits32(udc, isSwapped);
	startBase[i] = udcReadBits32(udc, isSwapped);
	endChromIx[i] = udcReadBits32(udc, isSwapped);
	endBase[i] = udcReadBits32(udc, isSwapped);
	offset[i] = udcReadBits64(udc, isSwapped);
	}

    /* Recurse into child nodes that we overlap. */
    for (i=0; i<childCount; ++i)
	{
	if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], 
		endChromIx[i], endBase[i]))
	    {
	    rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList);
	    }
	}
    }
}
예제 #11
0
파일: bigBed.c 프로젝트: cestmoi7/AGAPE
bits64 bigBedItemCount(struct bbiFile *bbi)
/* Return total items in file. */
{
udcSeek(bbi->udc, bbi->unzoomedDataOffset);
return udcReadBits64(bbi->udc, bbi->isSwapped);
}
struct bbiSummaryElement bbiTotalSummary(struct bbiFile *bbi)
/* Return summary of entire file! */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct bbiSummaryElement res;
ZeroVar(&res);

if (bbi->totalSummaryOffset != 0)
    {
    udcSeek(udc, bbi->totalSummaryOffset);
    res.validCount = udcReadBits64(udc, isSwapped);
    res.minVal = udcReadDouble(udc, isSwapped);
    res.maxVal = udcReadDouble(udc, isSwapped);
    res.sumData = udcReadDouble(udc, isSwapped);
    res.sumSquares = udcReadDouble(udc, isSwapped);
    }
else if (bbi->version == 1)
    /* Require version 1 so as not to have to deal with compression.  Should not happen
     * to have NULL totalSummaryOffset for non-empty version 2+ file anyway. */
    {
    /* Find most extreme zoom. */
    struct bbiZoomLevel *bestZoom = NULL, *zoom;
    bits32 bestReduction = 0;
    for (zoom = bbi->levelList; zoom != NULL; zoom = zoom->next)
	{
	if (zoom->reductionLevel > bestReduction)
	    {
	    bestReduction = zoom->reductionLevel;
	    bestZoom = zoom;
	    }
	}

    if (bestZoom != NULL)
	{
	udcSeek(udc, bestZoom->dataOffset);
	bits32 zoomSectionCount = udcReadBits32(udc, isSwapped);
	bits32 i;
	for (i=0; i<zoomSectionCount; ++i)
	    {
	    /* Read, but ignore, position. */
	    bits32 chromId, chromStart, chromEnd;
	    chromId = udcReadBits32(udc, isSwapped);
	    chromStart = udcReadBits32(udc, isSwapped);
	    chromEnd = udcReadBits32(udc, isSwapped);

	    /* First time through set values, rest of time add to them. */
	    if (i == 0)
		{
		res.validCount = udcReadBits32(udc, isSwapped);
		res.minVal = udcReadFloat(udc, isSwapped);
		res.maxVal = udcReadFloat(udc, isSwapped);
		res.sumData = udcReadFloat(udc, isSwapped);
		res.sumSquares = udcReadFloat(udc, isSwapped);
		}
	    else
		{
		res.validCount += udcReadBits32(udc, isSwapped);
		float minVal = udcReadFloat(udc, isSwapped);
		if (minVal < res.minVal) res.minVal = minVal;
		float maxVal = udcReadFloat(udc, isSwapped);
		if (maxVal > res.maxVal) res.maxVal = maxVal;
		res.sumData += udcReadFloat(udc, isSwapped);
		res.sumSquares += udcReadFloat(udc, isSwapped);
		}
	    }
	}
    }
return res;
}
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName)
/* Open up big wig or big bed file. */
{
/* This code needs to agree with code in two other places currently - bigBedFileCreate,
 * and bigWigFileCreate.  I'm thinking of refactoring to share at least between
 * bigBedFileCreate and bigWigFileCreate.  It'd be great so it could be structured
 * so that it could send the input in one chromosome at a time, and send in the zoom
 * stuff only after all the chromosomes are done.  This'd potentially reduce the memory
 * footprint by a factor of 2 or 4.  Still, for now it works. -JK */
struct bbiFile *bbi;
AllocVar(bbi);
bbi->fileName = cloneString(fileName);
struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir());

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustRead(udc, &magic, sizeof(magic));
if (magic != sig)
    {
    magic = byteSwap32(magic);
    isSwapped = TRUE;
    if (magic != sig)
       errAbort("%s is not a %s file", fileName, typeName);
    }
bbi->typeSig = sig;
bbi->isSwapped = isSwapped;

/* Read rest of defined bits of header, byte swapping as needed. */
bbi->version = udcReadBits16(udc, isSwapped);
bbi->zoomLevels = udcReadBits16(udc, isSwapped);
bbi->chromTreeOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped);
bbi->fieldCount = udcReadBits16(udc, isSwapped);
bbi->definedFieldCount = udcReadBits16(udc, isSwapped);
bbi->asOffset = udcReadBits64(udc, isSwapped);
bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped);
bbi->uncompressBufSize = udcReadBits32(udc, isSwapped);

/* Skip over reserved area. */
udcSeek(udc, 64);

/* Read zoom headers. */
int i;
struct bbiZoomLevel *level, *levelList = NULL;
for (i=0; i<bbi->zoomLevels; ++i)
    {
    AllocVar(level);
    level->reductionLevel = udcReadBits32(udc, isSwapped);
    level->reserved = udcReadBits32(udc, isSwapped);
    level->dataOffset = udcReadBits64(udc, isSwapped);
    level->indexOffset = udcReadBits64(udc, isSwapped);
    slAddHead(&levelList, level);
    }
slReverse(&levelList);
bbi->levelList = levelList;

/* Attach B+ tree of chromosome names and ids. */
udcSeek(udc, bbi->chromTreeOffset);
bbi->chromBpt =  bptFileAttach(fileName, udc);

return bbi;
}