void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza) /* Output stanza according to clOut */ { char *format = clOut; if (sameString(format, "ra")) { if (stanza->children == NULL) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindVal(stanza, field->name); if (val != NULL) printf("%s\t%s\n", field->name, val); } printf("\n"); } } else if (sameString(format, "tab")) { if (stanza->children == NULL) { struct slName *field; char *connector = ""; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = emptyForNull(tagFindVal(stanza, field->name)); printf("%s%s", connector, val); connector = "\t"; } printf("\n"); } } else if (sameString(format, "tags")) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindLocalVal(stanza, field->name); if (val != NULL) { repeatCharOut(stdout, '\t', depth); printf("%s\t%s\n", field->name, val); } } printf("\n"); } else errAbort("Unrecognized format %s", format); }
static void rTsWrite(struct tagStanza *list, FILE *f, int maxDepth, int depth) /* Recursively write out list to file */ { if (depth >= maxDepth) return; struct tagStanza *stanza; for (stanza = list; stanza != NULL; stanza = stanza->next) { struct slPair *pair; for (pair = stanza->tagList; pair != NULL; pair = pair->next) { repeatCharOut(f, '\t', depth); fprintf(f, "%s %s\n", pair->name, (char*)(pair->val)); } fputc('\n', f); rTsWrite(stanza->children, f, maxDepth, depth+1); } }
static void writeLeafLevel(bits16 blockSize, void *itemArray, int itemSize, int itemCount, void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, void* (*fetchVal)(const void *va), bits32 valSize, FILE *f) /* Write out leaf level blocks. */ { char *items = itemArray; int i,j; UBYTE isLeaf = TRUE; UBYTE reserved = 0; bits16 countOne; int countLeft = itemCount; char keyBuf[keySize+1]; keyBuf[keySize] = 0; for (i=0; i<itemCount; i += countOne) { /* Write block header */ if (countLeft > blockSize) countOne = blockSize; else countOne = countLeft; writeOne(f, isLeaf); writeOne(f, reserved); writeOne(f, countOne); /* Write out position in genome and in file for each item. */ for (j=0; j<countOne; ++j) { assert(i+j < itemCount); void *item = items + (i+j)*itemSize; memset(keyBuf, 0, keySize); (*fetchKey)(item, keyBuf); mustWrite(f, keyBuf, keySize); mustWrite(f, (*fetchVal)(item), valSize); } /* Pad out any unused bits of last block with zeroes. */ int slotSize = keySize + valSize; for (j=countOne; j<blockSize; ++j) repeatCharOut(f, 0, slotSize); countLeft -= countOne; } }
static bits64 rWriteIndexLevel(bits16 blockSize, int childNodeSize, struct rTree *tree, int curLevel, int destLevel, bits64 offsetOfFirstChild, FILE *f) /* Recursively write an index level, skipping levels below destLevel, * writing out destLevel. */ { // uglyf("rWriteIndexLevel blockSize=%d, childNodeSize=%d, offsetOfFirstChild=%llu, curLevel=%d, destLevel=%d slCount(tree)=%d\n", blockSize, childNodeSize, offsetOfFirstChild, curLevel, destLevel, slCount(tree->children)); struct rTree *el; bits64 offset = offsetOfFirstChild; if (curLevel == destLevel) { /* We've reached the right level, write out a node header */ UBYTE reserved = 0; UBYTE isLeaf = FALSE; bits16 countOne = slCount(tree->children); writeOne(f, isLeaf); writeOne(f, reserved); writeOne(f, countOne); /* Write out elements of this node. */ for (el = tree->children; el != NULL; el = el->next) { writeOne(f, el->startChromIx); writeOne(f, el->startBase); writeOne(f, el->endChromIx); writeOne(f, el->endBase); writeOne(f, offset); offset += childNodeSize; } /* Write out zeroes for empty slots in node. */ int i; for (i=countOne; i<blockSize; ++i) repeatCharOut(f, 0, indexSlotSize); } else { /* Otherwise recurse on children. */ for (el = tree->children; el != NULL; el = el->next) offset = rWriteIndexLevel(blockSize, childNodeSize, el, curLevel+1, destLevel, offset, f); } return offset; }
static void rWriteLeaves(int itemsPerSlot, int lNodeSize, struct rTree *tree, int curLevel, int leafLevel, FILE *f) /* Write out leaf-level nodes. */ { if (curLevel == leafLevel) { /* We've reached the right level, write out a node header. */ UBYTE reserved = 0; UBYTE isLeaf = TRUE; bits16 countOne = slCount(tree->children); writeOne(f, isLeaf); writeOne(f, reserved); writeOne(f, countOne); /* Write out elements of this node. */ struct rTree *el; for (el = tree->children; el != NULL; el = el->next) { writeOne(f, el->startChromIx); writeOne(f, el->startBase); writeOne(f, el->endChromIx); writeOne(f, el->endBase); writeOne(f, el->startFileOffset); bits64 size = el->endFileOffset - el->startFileOffset; writeOne(f, size); } /* Write out zeroes for empty slots in node. */ int i; for (i=countOne; i<itemsPerSlot; ++i) repeatCharOut(f, 0, indexSlotSize); } else { /* Otherwise recurse on children. */ struct rTree *el; for (el = tree->children; el != NULL; el = el->next) rWriteLeaves(itemsPerSlot, lNodeSize, el, curLevel+1, leafLevel, f); } }
static bits32 writeIndexLevel(bits16 blockSize, void *itemArray, int itemSize, long itemCount, bits32 indexOffset, int level, void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, bits32 valSize, FILE *f) /* Write out a non-leaf level. */ { char *items = itemArray; /* Calculate number of nodes to write at this level. */ long slotSizePer = xToY(blockSize, level); // Number of items per slot in node long nodeSizePer = slotSizePer * blockSize; // Number of items per node long nodeCount = (itemCount + nodeSizePer - 1)/nodeSizePer; /* Calculate sizes and offsets. */ long bytesInIndexBlock = (bptBlockHeaderSize + blockSize * (keySize+sizeof(bits64))); long bytesInLeafBlock = (bptBlockHeaderSize + blockSize * (keySize+valSize)); bits64 bytesInNextLevelBlock = (level == 1 ? bytesInLeafBlock : bytesInIndexBlock); bits64 levelSize = nodeCount * bytesInIndexBlock; bits64 endLevel = indexOffset + levelSize; bits64 nextChild = endLevel; UBYTE isLeaf = FALSE; UBYTE reserved = 0; long i,j; char keyBuf[keySize+1]; keyBuf[keySize] = 0; for (i=0; i<itemCount; i += nodeSizePer) { /* Calculate size of this block */ long countOne = (itemCount - i + slotSizePer - 1)/slotSizePer; if (countOne > blockSize) countOne = blockSize; bits16 shortCountOne = countOne; /* Write block header. */ writeOne(f, isLeaf); writeOne(f, reserved); writeOne(f, shortCountOne); /* Write out the slots that are used one by one, and do sanity check. */ int slotsUsed = 0; long endIx = i + nodeSizePer; if (endIx > itemCount) endIx = itemCount; for (j=i; j<endIx; j += slotSizePer) { void *item = items + j*itemSize; memset(keyBuf, 0, keySize); (*fetchKey)(item, keyBuf); mustWrite(f, keyBuf, keySize); writeOne(f, nextChild); nextChild += bytesInNextLevelBlock; ++slotsUsed; } assert(slotsUsed == shortCountOne); /* Write out empty slots as all zero. */ int slotSize = keySize + sizeof(bits64); for (j=countOne; j<blockSize; ++j) repeatCharOut(f, 0, slotSize); } return endLevel; }
void bbiWriteDummyZooms(FILE *f) /* Write out zeroes to reserve space for ten zoom levels. */ { repeatCharOut(f, 0, bbiMaxZoomLevels * 24); }
void bbiWriteDummyHeader(FILE *f) /* Write out all-zero header, just to reserve space for it. */ { repeatCharOut(f, 0, 64); }
void bbFileCreate( char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */ char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ char *asText, /* Field definitions in a string */ struct asObject *as, /* Field definitions parsed out */ boolean doCompress, /* If TRUE then compress data. */ struct slName *extraIndexList, /* List of extra indexes to add */ char *outName) /* BigBed output file name. */ /* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ { /* Set up timing measures. */ verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); bits16 fieldCount = slCount(as->columnList); bits16 extraIndexCount = slCount(extraIndexList); struct bbExIndexMaker *eim = NULL; if (extraIndexList != NULL) eim = bbExIndexMakerNew(extraIndexList, as); /* Load in chromosome sizes. */ struct hash *chromSizesHash = NULL; if (sizesIs2Bit) chromSizesHash = twoBitChromHash(chromSizes); else chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "Read %d chromosomes and sizes from %s\n", chromSizesHash->elCount, chromSizes); /* Do first pass, mostly just scanning file and counting hits per chromosome. */ int minDiff = 0; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount, tabSep); verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList)); verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize); /* Open output file and write dummy header. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out autoSql string */ bits64 asOffset = ftell(f); mustWrite(f, asText, strlen(asText) + 1); verbose(2, "as definition has %d columns\n", fieldCount); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out dummy header extension */ bits64 extHeaderOffset = ftell(f); bits16 extHeaderSize = 64; repeatCharOut(f, 0, extHeaderSize); /* Write out extra index stuff if need be. */ bits64 extraIndexListOffset = 0; bits64 extraIndexListEndOffset = 0; if (extraIndexList != NULL) { extraIndexListOffset = ftell(f); int extraIndexSize = 16 + 4*1; // Fixed record size 16, plus 1 times field size of 4 repeatCharOut(f, 0, extraIndexSize*extraIndexCount); extraIndexListEndOffset = ftell(f); } /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); bits32 blockCount = 0; bits32 maxBlockSize = 0; struct bbiBoundsArray *boundsArray = NULL; writeOne(f, bedCount); if (bedCount > 0) { blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); AllocArray(boundsArray, blockCount); lineFileRewind(lf); if (eim) bbExIndexMakerAllocChunkArrays(eim, bedCount); writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress, f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize); } verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", (long long)bedCount, fieldCount); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; /* Call monster zoom maker library function that bedGraphToBigWig also uses. */ int zoomLevels = 0; if (bedCount > 0) { zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedWriteReducedOnceReturnReducedTwice, fieldCount, doCompress, indexOffset - dataOffset, usageList, resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); } /* Write out extra indexes if need be. */ if (eim) { int i; for (i=0; i < eim->indexCount; ++i) { eim->fileOffsets[i] = ftell(f); maxBedNameSize = eim->maxFieldSize[i]; qsort(eim->chunkArrayArray[i], bedCount, sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName); assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); verboseTime(1, "Sorting and writing extra index %d", i); } } /* Figure out buffer size needed for uncompression if need be. */ if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); uncompressBufSize = max(maxBlockSize, maxZoomUncompSize); } /* Go back and rewrite header. */ rewind(f); bits32 sig = bigBedSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = zoomLevels; bits32 reserved32 = 0; bits64 reserved64 = 0; bits16 definedFieldCount = bedN; /* Write fixed header */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); writeOne(f, chromTreeOffset); writeOne(f, dataOffset); writeOne(f, indexOffset); writeOne(f, fieldCount); writeOne(f, definedFieldCount); writeOne(f, asOffset); writeOne(f, totalSummaryOffset); writeOne(f, uncompressBufSize); writeOne(f, extHeaderOffset); assert(ftell(f) == 64); /* Write summary headers with data. */ int i; verbose(2, "Writing %d levels of zoom\n", zoomLevels); for (i=0; i<zoomLevels; ++i) { verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]); writeOne(f, zoomAmounts[i]); writeOne(f, reserved32); writeOne(f, zoomDataOffsets[i]); writeOne(f, zoomIndexOffsets[i]); } /* Write rest of summary headers with no data. */ for (i=zoomLevels; i<bbiMaxZoomLevels; ++i) { writeOne(f, reserved32); writeOne(f, reserved32); writeOne(f, reserved64); writeOne(f, reserved64); } /* Write total summary. */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); /* Write extended header */ fseek(f, extHeaderOffset, SEEK_SET); writeOne(f, extHeaderSize); writeOne(f, extraIndexCount); writeOne(f, extraIndexListOffset); repeatCharOut(f, 0, 52); // reserved assert(ftell(f) - extHeaderOffset == extHeaderSize); /* Write extra index offsets if need be. */ if (extraIndexCount != 0) { fseek(f, extraIndexListOffset, SEEK_SET); int i; for (i=0; i<extraIndexCount; ++i) { // Write out fixed part of index info bits16 type = 0; // bPlusTree type bits16 indexFieldCount = 1; writeOne(f, type); writeOne(f, indexFieldCount); writeOne(f, eim->fileOffsets[i]); repeatCharOut(f, 0, 4); // reserved // Write out field list - easy this time because for now always only one field. bits16 fieldId = eim->indexFields[i]; writeOne(f, fieldId); repeatCharOut(f, 0, 2); // reserved } assert(ftell(f) == extraIndexListEndOffset); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up. */ lineFileClose(&lf); carefulClose(&f); freeHash(&chromSizesHash); bbiChromUsageFreeList(&usageList); asObjectFreeList(&as); }