void bbiWriteChromInfo(struct bbiChromUsage *usageList, int blockSize, FILE *f) /* Write out information on chromosomes to file. */ { int chromCount = slCount(usageList); struct bbiChromUsage *usage; /* Allocate and fill in array from list. */ struct bbiChromInfo *chromInfoArray; AllocArray(chromInfoArray, chromCount); int i; int maxChromNameSize = 0; for (i=0, usage = usageList; i<chromCount; ++i, usage = usage->next) { char *chromName = usage->name; int len = strlen(chromName); if (len > maxChromNameSize) maxChromNameSize = len; chromInfoArray[i].name = chromName; chromInfoArray[i].id = usage->id; chromInfoArray[i].size = usage->size; } /* Sort so the b-Tree actually works. */ qsort(chromInfoArray, chromCount, sizeof(chromInfoArray[0]), bbiChromInfoCmp); /* Write chromosome bPlusTree */ int chromBlockSize = min(blockSize, chromCount); bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize, bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), f); freeMem(chromInfoArray); }
void bptFileCreate( void *itemArray, /* Sorted array of things to index. */ int itemSize, /* Size of each element in array. */ bits64 itemCount, /* Number of elements in array. */ bits32 blockSize, /* B+ tree block size - # of children for each node. */ void (*fetchKey)(const void *va, char *keyBuf), /* Given item, copy key to keyBuf */ bits32 keySize, /* Size of key */ void* (*fetchVal)(const void *va), /* Given item, return pointer to value */ bits32 valSize, /* Size of value */ char *fileName) /* Name of output file. */ /* Create a b+ tree index file from a sorted array. */ { /* Open file and write header. */ FILE *f = mustOpen(fileName, "wb"); bptFileBulkIndexToOpenFile(itemArray, itemSize, itemCount, blockSize, fetchKey, keySize, fetchVal, valSize, f); carefulClose(&f); }
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, int blockSize, int itemsPerSlot, boolean doCompress, boolean keepAllChromosomes, boolean fixedSummaries, char *fileName) /* Create a bigWig file out of a sorted sectionList. */ { bits64 sectionCount = slCount(sectionList); FILE *f = mustOpen(fileName, "wb"); bits32 sig = bigWigSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = 0; bits16 reserved16 = 0; bits32 reserved32 = 0; bits64 reserved64 = 0; bits64 dataOffset = 0, dataOffsetPos; bits64 indexOffset = 0, indexOffsetPos; bits64 chromTreeOffset = 0, chromTreeOffsetPos; bits64 totalSummaryOffset = 0, totalSummaryOffsetPos; bits32 uncompressBufSize = 0; bits64 uncompressBufSizePos; struct bbiSummary *reduceSummaries[10]; bits32 reductionAmounts[10]; bits64 reductionDataOffsetPos[10]; bits64 reductionDataOffsets[10]; bits64 reductionIndexOffsets[10]; int i; /* Figure out chromosome ID's. */ struct bbiChromInfo *chromInfoArray; int chromCount, maxChromNameSize; if (keepAllChromosomes) bwgMakeAllChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); else bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); if (fixedSummaries) bwgComputeFixedSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, reductionAmounts); else bwgComputeDynamicSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, chromCount, reductionAmounts, doCompress); /* Write fixed header. */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); chromTreeOffsetPos = ftell(f); writeOne(f, chromTreeOffset); dataOffsetPos = ftell(f); writeOne(f, dataOffset); indexOffsetPos = ftell(f); writeOne(f, indexOffset); writeOne(f, reserved16); /* fieldCount */ writeOne(f, reserved16); /* definedFieldCount */ writeOne(f, reserved64); /* autoSqlOffset. */ totalSummaryOffsetPos = ftell(f); writeOne(f, totalSummaryOffset); uncompressBufSizePos = ftell(f); writeOne(f, uncompressBufSize); writeOne(f, reserved64); /* nameIndexOffset */ assert(ftell(f) == 64); /* Write summary headers */ for (i=0; i<summaryCount; ++i) { writeOne(f, reductionAmounts[i]); writeOne(f, reserved32); reductionDataOffsetPos[i] = ftell(f); writeOne(f, reserved64); // Fill in with data offset later writeOne(f, reserved64); // Fill in with index offset later } /* Write dummy summary */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write chromosome bPlusTree */ chromTreeOffset = ftell(f); int chromBlockSize = min(blockSize, chromCount); bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize, bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), f); /* Write out data section count and sections themselves. */ dataOffset = ftell(f); writeOne(f, sectionCount); struct bwgSection *section; for (section = sectionList; section != NULL; section = section->next) { bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f); if (uncSizeOne > uncompressBufSize) uncompressBufSize = uncSizeOne; } /* Write out index - creating a temporary array rather than list representation of * sections in the process. */ indexOffset = ftell(f); struct bwgSection **sectionArray; AllocArray(sectionArray, sectionCount); for (section = sectionList, i=0; section != NULL; section = section->next, ++i) sectionArray[i] = section; cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount, blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, indexOffset, f); freez(§ionArray); /* Write out summary sections. */ verbose(2, "bwgCreate writing %d summaries\n", summaryCount); for (i=0; i<summaryCount; ++i) { reductionDataOffsets[i] = ftell(f); reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f); verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i); } /* Calculate summary */ struct bbiSummary *sum = reduceSummaries[0]; if (sum != NULL) { totalSum.validCount = sum->validCount; totalSum.minVal = sum->minVal; totalSum.maxVal = sum->maxVal; totalSum.sumData = sum->sumData; totalSum.sumSquares = sum->sumSquares; for (sum = sum->next; sum != NULL; sum = sum->next) { totalSum.validCount += sum->validCount; if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal; if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal; totalSum.sumData += sum->sumData; totalSum.sumSquares += sum->sumSquares; } /* Write real summary */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); } else totalSummaryOffset = 0; /* Edge case, no summary. */ /* Go back and fill in offsets properly in header. */ fseek(f, dataOffsetPos, SEEK_SET); writeOne(f, dataOffset); fseek(f, indexOffsetPos, SEEK_SET); writeOne(f, indexOffset); fseek(f, chromTreeOffsetPos, SEEK_SET); writeOne(f, chromTreeOffset); fseek(f, totalSummaryOffsetPos, SEEK_SET); writeOne(f, totalSummaryOffset); if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); if (maxZoomUncompSize > uncompressBufSize) uncompressBufSize = maxZoomUncompSize; fseek(f, uncompressBufSizePos, SEEK_SET); writeOne(f, uncompressBufSize); } /* Also fill in offsets in zoom headers. */ for (i=0; i<summaryCount; ++i) { fseek(f, reductionDataOffsetPos[i], SEEK_SET); writeOne(f, reductionDataOffsets[i]); writeOne(f, reductionIndexOffsets[i]); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up */ freez(&chromInfoArray); carefulClose(&f); }
static void crTreeFileCreateLow( char **chromNames, /* All chromosome (or contig) names */ int chromCount, /* Number of chromosomes. */ void *itemArray, /* Sorted array of things to index. */ int itemSize, /* Size of each element in array. */ bits64 itemCount, /* Number of elements in array. */ bits32 blockSize, /* R tree block size - # of children for each node. */ bits32 itemsPerSlot, /* Number of items to put in each index slot at lowest level. */ struct crTreeRange (*fetchKey)(const void *va), /* Given item, return key. */ bits64 (*fetchOffset)(const void *va), /* Given item, return file offset */ bits64 initialDataOffset, /* Offset of 1st piece of data in file. */ bits64 totalDataSize, /* Total size of data we are indexing. */ char *fileName) /* Name of output file. */ /* Create a r tree index file from an array of chromosomes and an array of items with * basic bed (chromosome,start,end) and file offset information. */ { // uglyf("crTreeFileCreate %s itemCount=%llu, chromCount=%d\n", fileName, itemCount, chromCount); /* Open file and write header. */ FILE *f = mustOpen(fileName, "wb"); bits32 magic = crTreeSig; bits32 reserved32 = 0; bits64 chromOffset = crHeaderSize; bits64 cirOffset = 0; bits64 reserved64 = 0; writeOne(f, magic); writeOne(f, reserved32); writeOne(f, chromOffset); writeOne(f, cirOffset); /* Will fill this back in later */ writeOne(f, reserved64); writeOne(f, reserved64); writeOne(f, reserved64); writeOne(f, reserved64); writeOne(f, reserved64); /* Convert array of chromosomes to a sorted array of name32s. Also * figure out maximum chromosome name size. */ struct name32 *name32Array; AllocArray(name32Array, chromCount); bits32 chromIx; int maxChromNameSize = 0; for (chromIx=0; chromIx<chromCount; ++chromIx) { struct name32 *name32 = &name32Array[chromIx]; char *name = chromNames[chromIx]; name32->name = name; int nameSize = strlen(name); if (nameSize > maxChromNameSize) maxChromNameSize = nameSize; } qsort(name32Array, chromCount, sizeof(name32Array[0]), name32Cmp); for (chromIx=0; chromIx<chromCount; ++chromIx) { struct name32 *name32 = &name32Array[chromIx]; name32->val = chromIx; } /* Write out bPlusTree index of chromosome IDs. */ int chromBlockSize = min(blockSize, chromCount); bptFileBulkIndexToOpenFile(name32Array, sizeof(name32Array[0]), chromCount, chromBlockSize, name32Key, maxChromNameSize, name32Val, sizeof(name32Array[0].val), f); /* Convert itemArray to ciItemArray. This is mainly to avoid having to do the chromosome to * chromosome index conversion for each item. The cost is some memory though.... */ struct ciItem *ciItemArray; AllocArray(ciItemArray, itemCount); bits64 itemIx; char *itemPos = itemArray; char *lastChrom = ""; bits32 lastChromIx = 0; for (itemIx=0; itemIx < itemCount; ++itemIx) { struct ciItem *ciItem = &ciItemArray[itemIx]; ciItem->item = itemPos; ciItem->key = (*fetchKey)(itemPos); if (!sameString(lastChrom, ciItem->key.chrom)) { lastChrom = ciItem->key.chrom; lastChromIx = mustFindChromIx(lastChrom, name32Array, chromCount); } ciItem->chromIx = lastChromIx; itemPos += itemSize; } /* Record starting position of r tree and write it out. */ cirOffset = ftell(f); struct ciContext context; ZeroVar(&context); context.fetchKey = fetchKey; context.fetchOffset = fetchOffset; cirTreeFileBulkIndexToOpenFile(ciItemArray, sizeof(ciItemArray[0]), itemCount, blockSize, itemsPerSlot, &context, ciItemFetchKey, ciItemFetchOffset, totalDataSize, f); /* Seek back and write offset to r tree. */ fseek(f, cirOffsetPos, SEEK_SET); writeOne(f, cirOffset); /* Clean up */ freez(&name32Array); carefulClose(&f); }
void bbFileCreate( char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */ char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ char *asText, /* Field definitions in a string */ struct asObject *as, /* Field definitions parsed out */ boolean doCompress, /* If TRUE then compress data. */ struct slName *extraIndexList, /* List of extra indexes to add */ char *outName) /* BigBed output file name. */ /* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ { /* Set up timing measures. */ verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); bits16 fieldCount = slCount(as->columnList); bits16 extraIndexCount = slCount(extraIndexList); struct bbExIndexMaker *eim = NULL; if (extraIndexList != NULL) eim = bbExIndexMakerNew(extraIndexList, as); /* Load in chromosome sizes. */ struct hash *chromSizesHash = NULL; if (sizesIs2Bit) chromSizesHash = twoBitChromHash(chromSizes); else chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "Read %d chromosomes and sizes from %s\n", chromSizesHash->elCount, chromSizes); /* Do first pass, mostly just scanning file and counting hits per chromosome. */ int minDiff = 0; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount, tabSep); verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList)); verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize); /* Open output file and write dummy header. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out autoSql string */ bits64 asOffset = ftell(f); mustWrite(f, asText, strlen(asText) + 1); verbose(2, "as definition has %d columns\n", fieldCount); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out dummy header extension */ bits64 extHeaderOffset = ftell(f); bits16 extHeaderSize = 64; repeatCharOut(f, 0, extHeaderSize); /* Write out extra index stuff if need be. */ bits64 extraIndexListOffset = 0; bits64 extraIndexListEndOffset = 0; if (extraIndexList != NULL) { extraIndexListOffset = ftell(f); int extraIndexSize = 16 + 4*1; // Fixed record size 16, plus 1 times field size of 4 repeatCharOut(f, 0, extraIndexSize*extraIndexCount); extraIndexListEndOffset = ftell(f); } /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); bits32 blockCount = 0; bits32 maxBlockSize = 0; struct bbiBoundsArray *boundsArray = NULL; writeOne(f, bedCount); if (bedCount > 0) { blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); AllocArray(boundsArray, blockCount); lineFileRewind(lf); if (eim) bbExIndexMakerAllocChunkArrays(eim, bedCount); writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress, f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize); } verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", (long long)bedCount, fieldCount); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; /* Call monster zoom maker library function that bedGraphToBigWig also uses. */ int zoomLevels = 0; if (bedCount > 0) { zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedWriteReducedOnceReturnReducedTwice, fieldCount, doCompress, indexOffset - dataOffset, usageList, resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); } /* Write out extra indexes if need be. */ if (eim) { int i; for (i=0; i < eim->indexCount; ++i) { eim->fileOffsets[i] = ftell(f); maxBedNameSize = eim->maxFieldSize[i]; qsort(eim->chunkArrayArray[i], bedCount, sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName); assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); verboseTime(1, "Sorting and writing extra index %d", i); } } /* Figure out buffer size needed for uncompression if need be. */ if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); uncompressBufSize = max(maxBlockSize, maxZoomUncompSize); } /* Go back and rewrite header. */ rewind(f); bits32 sig = bigBedSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = zoomLevels; bits32 reserved32 = 0; bits64 reserved64 = 0; bits16 definedFieldCount = bedN; /* Write fixed header */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); writeOne(f, chromTreeOffset); writeOne(f, dataOffset); writeOne(f, indexOffset); writeOne(f, fieldCount); writeOne(f, definedFieldCount); writeOne(f, asOffset); writeOne(f, totalSummaryOffset); writeOne(f, uncompressBufSize); writeOne(f, extHeaderOffset); assert(ftell(f) == 64); /* Write summary headers with data. */ int i; verbose(2, "Writing %d levels of zoom\n", zoomLevels); for (i=0; i<zoomLevels; ++i) { verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]); writeOne(f, zoomAmounts[i]); writeOne(f, reserved32); writeOne(f, zoomDataOffsets[i]); writeOne(f, zoomIndexOffsets[i]); } /* Write rest of summary headers with no data. */ for (i=zoomLevels; i<bbiMaxZoomLevels; ++i) { writeOne(f, reserved32); writeOne(f, reserved32); writeOne(f, reserved64); writeOne(f, reserved64); } /* Write total summary. */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); /* Write extended header */ fseek(f, extHeaderOffset, SEEK_SET); writeOne(f, extHeaderSize); writeOne(f, extraIndexCount); writeOne(f, extraIndexListOffset); repeatCharOut(f, 0, 52); // reserved assert(ftell(f) - extHeaderOffset == extHeaderSize); /* Write extra index offsets if need be. */ if (extraIndexCount != 0) { fseek(f, extraIndexListOffset, SEEK_SET); int i; for (i=0; i<extraIndexCount; ++i) { // Write out fixed part of index info bits16 type = 0; // bPlusTree type bits16 indexFieldCount = 1; writeOne(f, type); writeOne(f, indexFieldCount); writeOne(f, eim->fileOffsets[i]); repeatCharOut(f, 0, 4); // reserved // Write out field list - easy this time because for now always only one field. bits16 fieldId = eim->indexFields[i]; writeOne(f, fieldId); repeatCharOut(f, 0, 2); // reserved } assert(ftell(f) == extraIndexListEndOffset); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up. */ lineFileClose(&lf); carefulClose(&f); freeHash(&chromSizesHash); bbiChromUsageFreeList(&usageList); asObjectFreeList(&as); }
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, int blockSize, int itemsPerSlot, boolean doCompress, char *fileName) /* Create a bigWig file out of a sorted sectionList. */ { bits64 sectionCount = slCount(sectionList); FILE *f = mustOpen(fileName, "wb"); bits32 sig = bigWigSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = 0; bits16 reserved16 = 0; bits32 reserved32 = 0; bits64 reserved64 = 0; bits64 dataOffset = 0, dataOffsetPos; bits64 indexOffset = 0, indexOffsetPos; bits64 chromTreeOffset = 0, chromTreeOffsetPos; bits64 totalSummaryOffset = 0, totalSummaryOffsetPos; bits32 uncompressBufSize = 0; bits64 uncompressBufSizePos; struct bbiSummary *reduceSummaries[10]; bits32 reductionAmounts[10]; bits64 reductionDataOffsetPos[10]; bits64 reductionDataOffsets[10]; bits64 reductionIndexOffsets[10]; int i; /* Figure out chromosome ID's. */ struct bbiChromInfo *chromInfoArray; int chromCount, maxChromNameSize; bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); /* Figure out initial summary level - starting with a summary 10 times the amount * of the smallest item. See if summarized data is smaller than half input data, if * not bump up reduction by a factor of 2 until it is, or until further summarying * yeilds no size reduction. */ int minRes = bwgAverageResolution(sectionList); int initialReduction = minRes*10; bits64 fullSize = bwgTotalSectionSize(sectionList); bits64 maxReducedSize = fullSize/2; struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL; bits64 lastSummarySize = 0, summarySize; for (;;) { summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction); bits64 summarySize = bbiTotalSummarySize(summaryList); if (doCompress) { summarySize *= 2; // Compensate for summary not compressing as well as primary data } if (summarySize >= maxReducedSize && summarySize != lastSummarySize) { /* Need to do more reduction. First scale reduction by amount that it missed * being small enough last time, with an extra 10% for good measure. Then * just to keep from spinning through loop two many times, make sure this is * at least 2x the previous reduction. */ int nextReduction = 1.1 * initialReduction * summarySize / maxReducedSize; if (nextReduction < initialReduction*2) nextReduction = initialReduction*2; initialReduction = nextReduction; bbiSummaryFreeList(&summaryList); lastSummarySize = summarySize; } else break; } summaryCount = 1; reduceSummaries[0] = firstSummaryList = summaryList; reductionAmounts[0] = initialReduction; /* Now calculate up to 10 levels of further summary. */ bits64 reduction = initialReduction; for (i=0; i<ArraySize(reduceSummaries)-1; i++) { reduction *= 4; if (reduction > 1000000000) break; summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, reduction); summarySize = bbiTotalSummarySize(summaryList); if (summarySize != lastSummarySize) { reduceSummaries[summaryCount] = summaryList; reductionAmounts[summaryCount] = reduction; ++summaryCount; } int summaryItemCount = slCount(summaryList); if (summaryItemCount <= chromCount) break; } /* Write fixed header. */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); chromTreeOffsetPos = ftell(f); writeOne(f, chromTreeOffset); dataOffsetPos = ftell(f); writeOne(f, dataOffset); indexOffsetPos = ftell(f); writeOne(f, indexOffset); writeOne(f, reserved16); /* fieldCount */ writeOne(f, reserved16); /* definedFieldCount */ writeOne(f, reserved64); /* autoSqlOffset. */ totalSummaryOffsetPos = ftell(f); writeOne(f, totalSummaryOffset); uncompressBufSizePos = ftell(f); writeOne(f, uncompressBufSize); writeOne(f, reserved64); /* nameIndexOffset */ assert(ftell(f) == 64); /* Write summary headers */ for (i=0; i<summaryCount; ++i) { writeOne(f, reductionAmounts[i]); writeOne(f, reserved32); reductionDataOffsetPos[i] = ftell(f); writeOne(f, reserved64); // Fill in with data offset later writeOne(f, reserved64); // Fill in with index offset later } /* Write dummy summary */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write chromosome bPlusTree */ chromTreeOffset = ftell(f); int chromBlockSize = min(blockSize, chromCount); bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize, bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), f); /* Write out data section count and sections themselves. */ dataOffset = ftell(f); writeOne(f, sectionCount); struct bwgSection *section; for (section = sectionList; section != NULL; section = section->next) { bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f); if (uncSizeOne > uncompressBufSize) uncompressBufSize = uncSizeOne; } /* Write out index - creating a temporary array rather than list representation of * sections in the process. */ indexOffset = ftell(f); struct bwgSection **sectionArray; AllocArray(sectionArray, sectionCount); for (section = sectionList, i=0; section != NULL; section = section->next, ++i) sectionArray[i] = section; cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount, blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, indexOffset, f); freez(§ionArray); /* Write out summary sections. */ verbose(2, "bwgCreate writing %d summaries\n", summaryCount); for (i=0; i<summaryCount; ++i) { reductionDataOffsets[i] = ftell(f); reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f); verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i); } /* Calculate summary */ struct bbiSummary *sum = firstSummaryList; if (sum != NULL) { totalSum.validCount = sum->validCount; totalSum.minVal = sum->minVal; totalSum.maxVal = sum->maxVal; totalSum.sumData = sum->sumData; totalSum.sumSquares = sum->sumSquares; for (sum = sum->next; sum != NULL; sum = sum->next) { totalSum.validCount += sum->validCount; if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal; if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal; totalSum.sumData += sum->sumData; totalSum.sumSquares += sum->sumSquares; } /* Write real summary */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); } else totalSummaryOffset = 0; /* Edge case, no summary. */ /* Go back and fill in offsets properly in header. */ fseek(f, dataOffsetPos, SEEK_SET); writeOne(f, dataOffset); fseek(f, indexOffsetPos, SEEK_SET); writeOne(f, indexOffset); fseek(f, chromTreeOffsetPos, SEEK_SET); writeOne(f, chromTreeOffset); fseek(f, totalSummaryOffsetPos, SEEK_SET); writeOne(f, totalSummaryOffset); if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); if (maxZoomUncompSize > uncompressBufSize) uncompressBufSize = maxZoomUncompSize; fseek(f, uncompressBufSizePos, SEEK_SET); writeOne(f, uncompressBufSize); } /* Also fill in offsets in zoom headers. */ for (i=0; i<summaryCount; ++i) { fseek(f, reductionDataOffsetPos[i], SEEK_SET); writeOne(f, reductionDataOffsets[i]); writeOne(f, reductionIndexOffsets[i]); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up */ freez(&chromInfoArray); carefulClose(&f); }