Beispiel #1
0
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza)
/* Output stanza according to clOut */
{
char *format = clOut;
if (sameString(format, "ra"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = tagFindVal(stanza, field->name);
	    if (val != NULL)
		printf("%s\t%s\n", field->name, val);
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tab"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	char *connector = "";
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = emptyForNull(tagFindVal(stanza, field->name));
	    printf("%s%s", connector, val);
	    connector = "\t";
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tags"))
    {
    struct slName *field;
    for (field = rql->fieldList; field != NULL; field = field->next)
	{
	char *val = tagFindLocalVal(stanza, field->name);
	if (val != NULL)
	    {
	    repeatCharOut(stdout, '\t', depth);
	    printf("%s\t%s\n", field->name, val);
	    }
	}
    printf("\n");
    }
else
    errAbort("Unrecognized format %s", format);
}
Beispiel #2
0
static void rTsWrite(struct tagStanza *list, FILE *f, int maxDepth, int depth)
/* Recursively write out list to file */
{
if (depth >= maxDepth)
    return;
struct tagStanza *stanza;
for (stanza = list; stanza != NULL; stanza = stanza->next)
    {
    struct slPair *pair;
    for (pair = stanza->tagList; pair != NULL; pair = pair->next)
        {
	repeatCharOut(f, '\t', depth);
	fprintf(f, "%s %s\n", pair->name, (char*)(pair->val));
	}
    fputc('\n', f);
    rTsWrite(stanza->children, f, maxDepth, depth+1);
    }
}
Beispiel #3
0
static void writeLeafLevel(bits16 blockSize, void *itemArray, int itemSize, int itemCount, 
	void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize,
	void* (*fetchVal)(const void *va), bits32 valSize,
	FILE *f)
/* Write out leaf level blocks. */
{
char *items = itemArray;
int i,j;
UBYTE isLeaf = TRUE;
UBYTE reserved = 0;
bits16 countOne;
int countLeft = itemCount;
char keyBuf[keySize+1];
keyBuf[keySize] = 0;
for (i=0; i<itemCount; i += countOne)
    {
    /* Write block header */
    if (countLeft > blockSize)
        countOne = blockSize;
    else
        countOne = countLeft;
    writeOne(f, isLeaf);
    writeOne(f, reserved);
    writeOne(f, countOne);

    /* Write out position in genome and in file for each item. */
    for (j=0; j<countOne; ++j)
        {
	assert(i+j < itemCount);
	void *item = items + (i+j)*itemSize;
	memset(keyBuf, 0, keySize);
	(*fetchKey)(item, keyBuf);
	mustWrite(f, keyBuf, keySize);
	mustWrite(f, (*fetchVal)(item), valSize);
	}
    
    /* Pad out any unused bits of last block with zeroes. */
    int slotSize = keySize + valSize;
    for (j=countOne; j<blockSize; ++j)
	repeatCharOut(f, 0, slotSize);

    countLeft -= countOne;
    }
}
Beispiel #4
0
static bits64 rWriteIndexLevel(bits16 blockSize, int childNodeSize,
	struct rTree *tree, int curLevel, int destLevel,
	bits64 offsetOfFirstChild, FILE *f)
/* Recursively write an index level, skipping levels below destLevel,
 * writing out destLevel. */
{
// uglyf("rWriteIndexLevel blockSize=%d, childNodeSize=%d, offsetOfFirstChild=%llu, curLevel=%d, destLevel=%d slCount(tree)=%d\n", blockSize, childNodeSize, offsetOfFirstChild, curLevel, destLevel, slCount(tree->children));
struct rTree *el;
bits64 offset = offsetOfFirstChild;
if (curLevel == destLevel)
    {
    /* We've reached the right level, write out a node header */
    UBYTE reserved = 0;
    UBYTE isLeaf = FALSE;
    bits16 countOne = slCount(tree->children);
    writeOne(f, isLeaf);
    writeOne(f, reserved);
    writeOne(f, countOne);

    /* Write out elements of this node. */
    for (el = tree->children; el != NULL; el = el->next)
	{
	writeOne(f, el->startChromIx);
	writeOne(f, el->startBase);
	writeOne(f, el->endChromIx);
	writeOne(f, el->endBase);
	writeOne(f, offset);
	offset += childNodeSize;
	}

    /* Write out zeroes for empty slots in node. */
    int i;
    for (i=countOne; i<blockSize; ++i)
	repeatCharOut(f, 0, indexSlotSize);
    }
else 
    {
    /* Otherwise recurse on children. */
    for (el = tree->children; el != NULL; el = el->next)
	offset = rWriteIndexLevel(blockSize, childNodeSize, el, curLevel+1, destLevel,
		offset, f);
    }
return offset;
}
Beispiel #5
0
static void rWriteLeaves(int itemsPerSlot, int lNodeSize, struct rTree *tree, int curLevel,
	int leafLevel, FILE *f)
/* Write out leaf-level nodes. */
{
if (curLevel == leafLevel)
    {
    /* We've reached the right level, write out a node header. */
    UBYTE reserved = 0;
    UBYTE isLeaf = TRUE;
    bits16 countOne = slCount(tree->children);
    writeOne(f, isLeaf);
    writeOne(f, reserved);
    writeOne(f, countOne);

    /* Write out elements of this node. */
    struct rTree *el;
    for (el = tree->children; el != NULL; el = el->next)
	{
	writeOne(f, el->startChromIx);
	writeOne(f, el->startBase);
	writeOne(f, el->endChromIx);
	writeOne(f, el->endBase);
	writeOne(f, el->startFileOffset);
	bits64 size = el->endFileOffset - el->startFileOffset;
	writeOne(f, size);
	}

    /* Write out zeroes for empty slots in node. */
    int i;
    for (i=countOne; i<itemsPerSlot; ++i)
	repeatCharOut(f, 0, indexSlotSize);
    }
else
    {
    /* Otherwise recurse on children. */
    struct rTree *el;
    for (el = tree->children; el != NULL; el = el->next)
	rWriteLeaves(itemsPerSlot, lNodeSize, el, curLevel+1, leafLevel, f);
    }
}
Beispiel #6
0
static bits32 writeIndexLevel(bits16 blockSize, 
	void *itemArray, int itemSize, long itemCount, 
	bits32 indexOffset, int level, 
	void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, bits32 valSize,
	FILE *f)
/* Write out a non-leaf level. */
{
char *items = itemArray;

/* Calculate number of nodes to write at this level. */
long slotSizePer = xToY(blockSize, level);   // Number of items per slot in node
long nodeSizePer = slotSizePer * blockSize;  // Number of items per node
long nodeCount = (itemCount + nodeSizePer - 1)/nodeSizePer;	


/* Calculate sizes and offsets. */
long bytesInIndexBlock = (bptBlockHeaderSize + blockSize * (keySize+sizeof(bits64)));
long bytesInLeafBlock = (bptBlockHeaderSize + blockSize * (keySize+valSize));
bits64 bytesInNextLevelBlock = (level == 1 ? bytesInLeafBlock : bytesInIndexBlock);
bits64 levelSize = nodeCount * bytesInIndexBlock;
bits64 endLevel = indexOffset + levelSize;
bits64 nextChild = endLevel;


UBYTE isLeaf = FALSE;
UBYTE reserved = 0;

long i,j;
char keyBuf[keySize+1];
keyBuf[keySize] = 0;
for (i=0; i<itemCount; i += nodeSizePer)
    {
    /* Calculate size of this block */
    long countOne = (itemCount - i + slotSizePer - 1)/slotSizePer;
    if (countOne > blockSize)
        countOne = blockSize;
    bits16 shortCountOne = countOne;

    /* Write block header. */
    writeOne(f, isLeaf);
    writeOne(f, reserved);
    writeOne(f, shortCountOne);

    /* Write out the slots that are used one by one, and do sanity check. */
    int slotsUsed = 0;
    long endIx = i + nodeSizePer;
    if (endIx > itemCount)
        endIx = itemCount;
    for (j=i; j<endIx; j += slotSizePer)
        {
	void *item = items + j*itemSize;
	memset(keyBuf, 0, keySize);
	(*fetchKey)(item, keyBuf);
	mustWrite(f, keyBuf, keySize);
	writeOne(f, nextChild);
	nextChild += bytesInNextLevelBlock;
	++slotsUsed;
	}
    assert(slotsUsed == shortCountOne);

    /* Write out empty slots as all zero. */
    int slotSize = keySize + sizeof(bits64);
    for (j=countOne; j<blockSize; ++j)
	repeatCharOut(f, 0, slotSize);
    }
return endLevel;
}
Beispiel #7
0
void bbiWriteDummyZooms(FILE *f)
/* Write out zeroes to reserve space for ten zoom levels. */
{
repeatCharOut(f, 0, bbiMaxZoomLevels * 24);
}
Beispiel #8
0
void bbiWriteDummyHeader(FILE *f)
/* Write out all-zero header, just to reserve space for it. */
{
repeatCharOut(f, 0, 64);
}
Beispiel #9
0
void bbFileCreate(
	char *inName, 	  /* Input file in a tabular bed format <chrom><start><end> + whatever. */
	char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */
	int blockSize,	  /* Number of items to bundle in r-tree.  1024 is good. */
	int itemsPerSlot, /* Number of items in lowest level of tree.  64 is good. */
	char *asText,	  /* Field definitions in a string */
	struct asObject *as,  /* Field definitions parsed out */
	boolean doCompress, /* If TRUE then compress data. */
	struct slName *extraIndexList,	/* List of extra indexes to add */
	char *outName)    /* BigBed output file name. */
/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */
{
/* Set up timing measures. */
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);

bits16 fieldCount = slCount(as->columnList);
bits16 extraIndexCount = slCount(extraIndexList);

struct bbExIndexMaker *eim = NULL;
if (extraIndexList != NULL)
    eim = bbExIndexMakerNew(extraIndexList, as);

/* Load in chromosome sizes. */
struct hash *chromSizesHash = NULL;

if (sizesIs2Bit)
    chromSizesHash = twoBitChromHash(chromSizes);
else
    chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "Read %d chromosomes and sizes from %s\n",  chromSizesHash->elCount, chromSizes);

/* Do first pass, mostly just scanning file and counting hits per chromosome. */
int minDiff = 0;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, 
    &minDiff, &aveSize, &bedCount, tabSep);
verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList));
verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize);

/* Open output file and write dummy header. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);

/* Write out autoSql string */
bits64 asOffset = ftell(f);
mustWrite(f, asText, strlen(asText) + 1);
verbose(2, "as definition has %d columns\n", fieldCount);

/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write out dummy header extension */
bits64 extHeaderOffset = ftell(f);
bits16 extHeaderSize = 64;
repeatCharOut(f, 0, extHeaderSize);

/* Write out extra index stuff if need be. */
bits64 extraIndexListOffset = 0;
bits64 extraIndexListEndOffset = 0;
if (extraIndexList != NULL)
    {
    extraIndexListOffset = ftell(f);
    int extraIndexSize = 16 + 4*1;   // Fixed record size 16, plus 1 times field size of 4 
    repeatCharOut(f, 0, extraIndexSize*extraIndexCount);
    extraIndexListEndOffset = ftell(f);
    }

/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);

/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);

/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits32 blockCount = 0;
bits32 maxBlockSize = 0;
struct bbiBoundsArray *boundsArray = NULL;
writeOne(f, bedCount);
if (bedCount > 0)
    {
    blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
    AllocArray(boundsArray, blockCount);
    lineFileRewind(lf);
    if (eim)
	bbExIndexMakerAllocChunkArrays(eim, bedCount);
    writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress,
	    f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize);
    }
verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", 
	(long long)bedCount, fieldCount);

/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount,
    blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);
freez(&boundsArray);
verboseTime(2, "index write");

/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];

/* Call monster zoom maker library function that bedGraphToBigWig also uses. */
int zoomLevels = 0;
if (bedCount > 0)
    {
    zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
	bedWriteReducedOnceReturnReducedTwice, fieldCount,
	doCompress, indexOffset - dataOffset, 
	usageList, resTryCount, resScales, resSizes, 
	zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);
    }

/* Write out extra indexes if need be. */
if (eim)
    {
    int i;
    for (i=0; i < eim->indexCount; ++i)
        {
	eim->fileOffsets[i] = ftell(f);
	maxBedNameSize = eim->maxFieldSize[i];
	qsort(eim->chunkArrayArray[i], bedCount, 
	    sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName);
	assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0]));
	bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), 
	    bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, 
	    sizeof(bits64) + sizeof(bits64), f);
	verboseTime(1, "Sorting and writing extra index %d", i);
	}
    }

/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    uncompressBufSize = max(maxBlockSize, maxZoomUncompSize);
    }

/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigBedSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits32 reserved32 = 0;
bits64 reserved64 = 0;

bits16 definedFieldCount = bedN;

/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, fieldCount);
writeOne(f, definedFieldCount);
writeOne(f, asOffset);
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, extHeaderOffset);
assert(ftell(f) == 64);

/* Write summary headers with data. */
int i;
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
    {
    verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
    writeOne(f, zoomAmounts[i]);
    writeOne(f, reserved32);
    writeOne(f, zoomDataOffsets[i]);
    writeOne(f, zoomIndexOffsets[i]);
    }
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
    {
    writeOne(f, reserved32);
    writeOne(f, reserved32);
    writeOne(f, reserved64);
    writeOne(f, reserved64);
    }

/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);

/* Write extended header */
fseek(f, extHeaderOffset, SEEK_SET);
writeOne(f, extHeaderSize);
writeOne(f, extraIndexCount);
writeOne(f, extraIndexListOffset);
repeatCharOut(f, 0, 52);    // reserved
assert(ftell(f) - extHeaderOffset == extHeaderSize);

/* Write extra index offsets if need be. */
if (extraIndexCount != 0)
    {
    fseek(f, extraIndexListOffset, SEEK_SET);
    int i;
    for (i=0; i<extraIndexCount; ++i)
        {
	// Write out fixed part of index info
	bits16 type = 0;    // bPlusTree type
	bits16 indexFieldCount = 1;
	writeOne(f, type);
	writeOne(f, indexFieldCount);
	writeOne(f, eim->fileOffsets[i]);
	repeatCharOut(f, 0, 4);  // reserved

	// Write out field list - easy this time because for now always only one field.
	bits16 fieldId = eim->indexFields[i];
	writeOne(f, fieldId);
	repeatCharOut(f, 0, 2); // reserved
	}
    assert(ftell(f) == extraIndexListEndOffset);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);


/* Clean up. */
lineFileClose(&lf);
carefulClose(&f);
freeHash(&chromSizesHash);
bbiChromUsageFreeList(&usageList);
asObjectFreeList(&as);
}