Exemplo n.º 1
0
static bits64 bbiWriteSummaryAndIndexUnc(struct bbiSummary *summaryList, 
	int blockSize, int itemsPerSlot, FILE *f)
/* Write out summary and index to summary compressed, returning start position of
 * summary index. */
{
bits32 i, count = slCount(summaryList);
struct bbiSummary **summaryArray;
AllocArray(summaryArray, count);
writeOne(f, count);
struct bbiSummary *summary;
for (summary = summaryList, i=0; summary != NULL; summary = summary->next, ++i)
    {
    summaryArray[i] = summary;
    summary->fileOffset = ftell(f);
    writeOne(f, summary->chromId);
    writeOne(f, summary->start);
    writeOne(f, summary->end);
    writeOne(f, summary->validCount);
    bbiWriteFloat(f, summary->minVal);
    bbiWriteFloat(f, summary->maxVal);
    bbiWriteFloat(f, summary->sumData);
    bbiWriteFloat(f, summary->sumSquares);
    }
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(summaryArray, sizeof(summaryArray[0]), count,
    blockSize, itemsPerSlot, NULL, bbiSummaryFetchKey, bbiSummaryFetchOffset, 
    indexOffset, f);
freez(&summaryArray);
return indexOffset;
}
Exemplo n.º 2
0
static bits64 bbiWriteSummaryAndIndexComp(struct bbiSummary *summaryList, 
	int blockSize, int itemsPerSlot, FILE *f)
/* Write out summary and index to summary uncompressed, returning start position of
 * summary index. */
{
bits32 i, count = slCount(summaryList);
struct bbiSummary **summaryArray;
AllocArray(summaryArray, count);
writeOne(f, count);
struct bbiSummary *summary = summaryList;

/* Figure out max size of uncompressed and compressed blocks. */
bits32 itemSize = sizeof(summary->chromId) + sizeof(summary->start) + sizeof(summary->end) + sizeof(summary->validCount) + 4*sizeof(float);
int uncBufSize = itemSize * itemsPerSlot;
char uncBuf[uncBufSize];
int compBufSize = zCompBufSize(uncBufSize);
char compBuf[compBufSize];

/* Loop through compressing and writing one slot at a time. */
bits32 itemsLeft = count;
int sumIx = 0;
while (itemsLeft > 0)
    {
    bits32 itemsInSlot = itemsLeft;
    if (itemsInSlot > itemsPerSlot)
         itemsInSlot = itemsPerSlot;
    char *writePt = uncBuf;

    bits64 filePos = ftell(f);
    for (i=0; i<itemsInSlot; ++i)
        {
	summaryArray[sumIx++] = summary;
	memWriteOne(&writePt, summary->chromId);
	memWriteOne(&writePt, summary->start);
	memWriteOne(&writePt, summary->end);
	memWriteOne(&writePt, summary->validCount);
	memWriteFloat(&writePt, summary->minVal);
	memWriteFloat(&writePt, summary->maxVal);
	memWriteFloat(&writePt, summary->sumData);
	memWriteFloat(&writePt, summary->sumSquares);
	summary->fileOffset = filePos;
	summary = summary->next;
	if (summary == NULL)
	    break;
	}

    bits32 uncSize = writePt - uncBuf;
    int compSize = zCompress(uncBuf, uncSize, compBuf, compBufSize);
    mustWrite(f, compBuf, compSize);

    itemsLeft -= itemsInSlot;
    }
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(summaryArray, sizeof(summaryArray[0]), count,
    blockSize, itemsPerSlot, NULL, bbiSummaryFetchKey, bbiSummaryFetchOffset, 
    indexOffset, f);
freez(&summaryArray);
return indexOffset;
}
Exemplo n.º 3
0
void cirTreeFileCreate(
	void *itemArray, 	/* Sorted array of things to index. */
	int itemSize, 		/* Size of each element in array. */
	bits64 itemCount, 	/* Number of elements in array. */
	bits32 blockSize,	/* R tree block size - # of children for each node. */
	bits32 itemsPerSlot,	/* Number of items to put in each index slot at lowest level. */
	void *context,		/* Context pointer for use by fetch call-back functions. */
	struct cirTreeRange (*fetchKey)(const void *va, void *context),/* Given item, return key. */
	bits64 (*fetchOffset)(const void *va, void *context), /* Given item, return file offset */
	bits64 endFileOffset,				 /* Last position in file we index. */
	char *fileName)                                  /* Name of output file. */
/* Create a r tree index file from a sorted array. */
{
FILE *f = mustOpen(fileName, "wb");
cirTreeFileBulkIndexToOpenFile(itemArray, itemSize, itemCount, blockSize, itemsPerSlot,
	context, fetchKey, fetchOffset, endFileOffset, f);
carefulClose(&f);
}
Exemplo n.º 4
0
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
	int blockSize, int itemsPerSlot, boolean doCompress, boolean keepAllChromosomes,
        boolean fixedSummaries, char *fileName)
/* Create a bigWig file out of a sorted sectionList. */
{
bits64 sectionCount = slCount(sectionList);
FILE *f = mustOpen(fileName, "wb");
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = 0;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;
bits64 dataOffset = 0, dataOffsetPos;
bits64 indexOffset = 0, indexOffsetPos;
bits64 chromTreeOffset = 0, chromTreeOffsetPos;
bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
bits32 uncompressBufSize = 0;
bits64 uncompressBufSizePos;
struct bbiSummary *reduceSummaries[10];
bits32 reductionAmounts[10];
bits64 reductionDataOffsetPos[10];
bits64 reductionDataOffsets[10];
bits64 reductionIndexOffsets[10];
int i;

/* Figure out chromosome ID's. */
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
if (keepAllChromosomes)
    bwgMakeAllChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
else
    bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);

if (fixedSummaries) 
    bwgComputeFixedSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, reductionAmounts);
else 
    bwgComputeDynamicSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, chromCount, reductionAmounts, doCompress);

/* Write fixed header. */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
chromTreeOffsetPos = ftell(f);
writeOne(f, chromTreeOffset);
dataOffsetPos = ftell(f);
writeOne(f, dataOffset);
indexOffsetPos = ftell(f);
writeOne(f, indexOffset);
writeOne(f, reserved16);  /* fieldCount */
writeOne(f, reserved16);  /* definedFieldCount */
writeOne(f, reserved64);  /* autoSqlOffset. */
totalSummaryOffsetPos = ftell(f);
writeOne(f, totalSummaryOffset);
uncompressBufSizePos = ftell(f);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);  /* nameIndexOffset */
assert(ftell(f) == 64);

/* Write summary headers */
for (i=0; i<summaryCount; ++i)
    {
    writeOne(f, reductionAmounts[i]);
    writeOne(f, reserved32);
    reductionDataOffsetPos[i] = ftell(f);
    writeOne(f, reserved64);	// Fill in with data offset later
    writeOne(f, reserved64);	// Fill in with index offset later
    }

/* Write dummy summary */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write chromosome bPlusTree */
chromTreeOffset = ftell(f);
int chromBlockSize = min(blockSize, chromCount);
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
    bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, 
    sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), 
    f);

/* Write out data section count and sections themselves. */
dataOffset = ftell(f);
writeOne(f, sectionCount);
struct bwgSection *section;
for (section = sectionList; section != NULL; section = section->next)
    {
    bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f);
    if (uncSizeOne > uncompressBufSize)
         uncompressBufSize = uncSizeOne;
    }

/* Write out index - creating a temporary array rather than list representation of
 * sections in the process. */
indexOffset = ftell(f);
struct bwgSection **sectionArray;
AllocArray(sectionArray, sectionCount);
for (section = sectionList, i=0; section != NULL; section = section->next, ++i)
    sectionArray[i] = section;
cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount,
    blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, 
    indexOffset, f);
freez(&sectionArray);

/* Write out summary sections. */
verbose(2, "bwgCreate writing %d summaries\n", summaryCount);
for (i=0; i<summaryCount; ++i)
    {
    reductionDataOffsets[i] = ftell(f);
    reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f);
    verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i);
    }

/* Calculate summary */
struct bbiSummary *sum = reduceSummaries[0];
if (sum != NULL)
    {
    totalSum.validCount = sum->validCount;
    totalSum.minVal = sum->minVal;
    totalSum.maxVal = sum->maxVal;
    totalSum.sumData = sum->sumData;
    totalSum.sumSquares = sum->sumSquares;
    for (sum = sum->next; sum != NULL; sum = sum->next)
	{
	totalSum.validCount += sum->validCount;
	if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal;
	if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal;
	totalSum.sumData += sum->sumData;
	totalSum.sumSquares += sum->sumSquares;
	}
    /* Write real summary */
    fseek(f, totalSummaryOffset, SEEK_SET);
    bbiSummaryElementWrite(f, &totalSum);
    }
else
    totalSummaryOffset = 0;	/* Edge case, no summary. */

/* Go back and fill in offsets properly in header. */
fseek(f, dataOffsetPos, SEEK_SET);
writeOne(f, dataOffset);
fseek(f, indexOffsetPos, SEEK_SET);
writeOne(f, indexOffset);
fseek(f, chromTreeOffsetPos, SEEK_SET);
writeOne(f, chromTreeOffset);
fseek(f, totalSummaryOffsetPos, SEEK_SET);
writeOne(f, totalSummaryOffset);

if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    if (maxZoomUncompSize > uncompressBufSize)
	uncompressBufSize = maxZoomUncompSize;
    fseek(f, uncompressBufSizePos, SEEK_SET);
    writeOne(f, uncompressBufSize);
    }

/* Also fill in offsets in zoom headers. */
for (i=0; i<summaryCount; ++i)
    {
    fseek(f, reductionDataOffsetPos[i], SEEK_SET);
    writeOne(f, reductionDataOffsets[i]);
    writeOne(f, reductionIndexOffsets[i]);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

/* Clean up */
freez(&chromInfoArray);
carefulClose(&f);
}
Exemplo n.º 5
0
static void crTreeFileCreateLow(
	char **chromNames,	/* All chromosome (or contig) names */
	int chromCount,		/* Number of chromosomes. */
	void *itemArray, 	/* Sorted array of things to index. */
	int itemSize, 		/* Size of each element in array. */
	bits64 itemCount, 	/* Number of elements in array. */
	bits32 blockSize,	/* R tree block size - # of children for each node. */
	bits32 itemsPerSlot,	/* Number of items to put in each index slot at lowest level. */
	struct crTreeRange (*fetchKey)(const void *va),   /* Given item, return key. */
	bits64 (*fetchOffset)(const void *va), 		 /* Given item, return file offset */
	bits64 initialDataOffset,			 /* Offset of 1st piece of data in file. */
	bits64 totalDataSize,				 /* Total size of data we are indexing. */
	char *fileName)                                 /* Name of output file. */
/* Create a r tree index file from an array of chromosomes and an array of items with
 * basic bed (chromosome,start,end) and file offset information. */
{
// uglyf("crTreeFileCreate %s itemCount=%llu, chromCount=%d\n", fileName, itemCount, chromCount);
/* Open file and write header. */
FILE *f = mustOpen(fileName, "wb");
bits32 magic = crTreeSig;
bits32 reserved32 = 0;
bits64 chromOffset = crHeaderSize;
bits64 cirOffset = 0;
bits64 reserved64 = 0;
writeOne(f, magic);
writeOne(f, reserved32);
writeOne(f, chromOffset);
writeOne(f, cirOffset);	       /* Will fill this back in later */
writeOne(f, reserved64);
writeOne(f, reserved64);
writeOne(f, reserved64);
writeOne(f, reserved64);
writeOne(f, reserved64);

/* Convert array of chromosomes to a sorted array of name32s.  Also
 * figure out maximum chromosome name size. */
struct name32 *name32Array;
AllocArray(name32Array, chromCount);
bits32 chromIx;
int maxChromNameSize = 0;
for (chromIx=0; chromIx<chromCount; ++chromIx)
    {
    struct name32 *name32 = &name32Array[chromIx];
    char *name = chromNames[chromIx];
    name32->name = name;
    int nameSize = strlen(name);
    if (nameSize > maxChromNameSize)
        maxChromNameSize = nameSize;
    }
qsort(name32Array, chromCount, sizeof(name32Array[0]), name32Cmp);
for (chromIx=0; chromIx<chromCount; ++chromIx)
    {
    struct name32 *name32 = &name32Array[chromIx];
    name32->val = chromIx;
    }

/* Write out bPlusTree index of chromosome IDs. */
int chromBlockSize = min(blockSize, chromCount);
bptFileBulkIndexToOpenFile(name32Array, sizeof(name32Array[0]), chromCount, chromBlockSize,
    name32Key, maxChromNameSize, name32Val, sizeof(name32Array[0].val), f);
	
/* Convert itemArray to ciItemArray.  This is mainly to avoid having to do the chromosome to
 * chromosome index conversion for each item.  The cost is some memory though.... */
struct ciItem *ciItemArray;
AllocArray(ciItemArray, itemCount);
bits64 itemIx;
char *itemPos = itemArray;
char *lastChrom = "";
bits32 lastChromIx = 0;
for (itemIx=0; itemIx < itemCount; ++itemIx)
    {
    struct ciItem *ciItem = &ciItemArray[itemIx];
    ciItem->item = itemPos;
    ciItem->key = (*fetchKey)(itemPos);
    if (!sameString(lastChrom, ciItem->key.chrom))
        {
	lastChrom = ciItem->key.chrom;
	lastChromIx = mustFindChromIx(lastChrom, name32Array, chromCount);
	}
    ciItem->chromIx = lastChromIx;
    itemPos += itemSize;
    }

/* Record starting position of r tree and write it out. */
cirOffset = ftell(f);
struct ciContext context;
ZeroVar(&context);
context.fetchKey = fetchKey;
context.fetchOffset = fetchOffset;
cirTreeFileBulkIndexToOpenFile(ciItemArray, sizeof(ciItemArray[0]), itemCount, blockSize, 
	itemsPerSlot, &context, ciItemFetchKey, ciItemFetchOffset, totalDataSize, f);

/* Seek back and write offset to r tree. */
fseek(f, cirOffsetPos, SEEK_SET);
writeOne(f, cirOffset);


/* Clean up */
freez(&name32Array);
carefulClose(&f);
}
Exemplo n.º 6
0
void bbFileCreate(
	char *inName, 	  /* Input file in a tabular bed format <chrom><start><end> + whatever. */
	char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */
	int blockSize,	  /* Number of items to bundle in r-tree.  1024 is good. */
	int itemsPerSlot, /* Number of items in lowest level of tree.  64 is good. */
	char *asText,	  /* Field definitions in a string */
	struct asObject *as,  /* Field definitions parsed out */
	boolean doCompress, /* If TRUE then compress data. */
	struct slName *extraIndexList,	/* List of extra indexes to add */
	char *outName)    /* BigBed output file name. */
/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */
{
/* Set up timing measures. */
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);

bits16 fieldCount = slCount(as->columnList);
bits16 extraIndexCount = slCount(extraIndexList);

struct bbExIndexMaker *eim = NULL;
if (extraIndexList != NULL)
    eim = bbExIndexMakerNew(extraIndexList, as);

/* Load in chromosome sizes. */
struct hash *chromSizesHash = NULL;

if (sizesIs2Bit)
    chromSizesHash = twoBitChromHash(chromSizes);
else
    chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "Read %d chromosomes and sizes from %s\n",  chromSizesHash->elCount, chromSizes);

/* Do first pass, mostly just scanning file and counting hits per chromosome. */
int minDiff = 0;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, 
    &minDiff, &aveSize, &bedCount, tabSep);
verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList));
verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize);

/* Open output file and write dummy header. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);

/* Write out autoSql string */
bits64 asOffset = ftell(f);
mustWrite(f, asText, strlen(asText) + 1);
verbose(2, "as definition has %d columns\n", fieldCount);

/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write out dummy header extension */
bits64 extHeaderOffset = ftell(f);
bits16 extHeaderSize = 64;
repeatCharOut(f, 0, extHeaderSize);

/* Write out extra index stuff if need be. */
bits64 extraIndexListOffset = 0;
bits64 extraIndexListEndOffset = 0;
if (extraIndexList != NULL)
    {
    extraIndexListOffset = ftell(f);
    int extraIndexSize = 16 + 4*1;   // Fixed record size 16, plus 1 times field size of 4 
    repeatCharOut(f, 0, extraIndexSize*extraIndexCount);
    extraIndexListEndOffset = ftell(f);
    }

/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);

/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);

/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits32 blockCount = 0;
bits32 maxBlockSize = 0;
struct bbiBoundsArray *boundsArray = NULL;
writeOne(f, bedCount);
if (bedCount > 0)
    {
    blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
    AllocArray(boundsArray, blockCount);
    lineFileRewind(lf);
    if (eim)
	bbExIndexMakerAllocChunkArrays(eim, bedCount);
    writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress,
	    f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize);
    }
verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", 
	(long long)bedCount, fieldCount);

/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount,
    blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);
freez(&boundsArray);
verboseTime(2, "index write");

/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];

/* Call monster zoom maker library function that bedGraphToBigWig also uses. */
int zoomLevels = 0;
if (bedCount > 0)
    {
    zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
	bedWriteReducedOnceReturnReducedTwice, fieldCount,
	doCompress, indexOffset - dataOffset, 
	usageList, resTryCount, resScales, resSizes, 
	zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);
    }

/* Write out extra indexes if need be. */
if (eim)
    {
    int i;
    for (i=0; i < eim->indexCount; ++i)
        {
	eim->fileOffsets[i] = ftell(f);
	maxBedNameSize = eim->maxFieldSize[i];
	qsort(eim->chunkArrayArray[i], bedCount, 
	    sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName);
	assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0]));
	bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), 
	    bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, 
	    sizeof(bits64) + sizeof(bits64), f);
	verboseTime(1, "Sorting and writing extra index %d", i);
	}
    }

/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    uncompressBufSize = max(maxBlockSize, maxZoomUncompSize);
    }

/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigBedSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits32 reserved32 = 0;
bits64 reserved64 = 0;

bits16 definedFieldCount = bedN;

/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, fieldCount);
writeOne(f, definedFieldCount);
writeOne(f, asOffset);
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, extHeaderOffset);
assert(ftell(f) == 64);

/* Write summary headers with data. */
int i;
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
    {
    verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
    writeOne(f, zoomAmounts[i]);
    writeOne(f, reserved32);
    writeOne(f, zoomDataOffsets[i]);
    writeOne(f, zoomIndexOffsets[i]);
    }
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
    {
    writeOne(f, reserved32);
    writeOne(f, reserved32);
    writeOne(f, reserved64);
    writeOne(f, reserved64);
    }

/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);

/* Write extended header */
fseek(f, extHeaderOffset, SEEK_SET);
writeOne(f, extHeaderSize);
writeOne(f, extraIndexCount);
writeOne(f, extraIndexListOffset);
repeatCharOut(f, 0, 52);    // reserved
assert(ftell(f) - extHeaderOffset == extHeaderSize);

/* Write extra index offsets if need be. */
if (extraIndexCount != 0)
    {
    fseek(f, extraIndexListOffset, SEEK_SET);
    int i;
    for (i=0; i<extraIndexCount; ++i)
        {
	// Write out fixed part of index info
	bits16 type = 0;    // bPlusTree type
	bits16 indexFieldCount = 1;
	writeOne(f, type);
	writeOne(f, indexFieldCount);
	writeOne(f, eim->fileOffsets[i]);
	repeatCharOut(f, 0, 4);  // reserved

	// Write out field list - easy this time because for now always only one field.
	bits16 fieldId = eim->indexFields[i];
	writeOne(f, fieldId);
	repeatCharOut(f, 0, 2); // reserved
	}
    assert(ftell(f) == extraIndexListEndOffset);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);


/* Clean up. */
lineFileClose(&lf);
carefulClose(&f);
freeHash(&chromSizesHash);
bbiChromUsageFreeList(&usageList);
asObjectFreeList(&as);
}
Exemplo n.º 7
0
static struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, 
	int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, 
	int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress,
	struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
	struct bbiSummaryElement *totalSum)
/* Write out data reduced by factor of initialReduction.  Also calculate and keep in memory
 * next reduction level.  This is more work than some ways, but it keeps us from having to
 * keep the first reduction entirely in memory. */
{
struct bbiSummary *twiceReducedList = NULL;
bits32 doubleReductionSize = initialReduction * zoomIncrement;
struct bbiChromUsage *usage = usageList;
struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd;
boundsPt = AllocArray(boundsArray, initialReductionCount);
boundsEnd = boundsPt + initialReductionCount;

*retDataStart = ftell(f);
writeOne(f, initialReductionCount);

/* This gets a little complicated I'm afraid.  The strategy is to:
 *   1) Build up a range tree that represents coverage depth on that chromosome
 *      This also has the nice side effect of getting rid of overlaps.
 *   2) Stream through the range tree, outputting the initial summary level and
 *      further reducing. 
 */
boolean firstTime = TRUE;
struct bbiSumOutStream *stream = bbiSumOutStreamOpen(itemsPerSlot, f, doCompress);
for (usage = usageList; usage != NULL; usage = usage->next)
    {
    struct bbiSummary oneSummary, *sum = NULL;
    struct rbTree *rangeTree = rangeTreeForBedChrom(lf, usage->name);
    struct range *range, *rangeList = rangeTreeList(rangeTree);
    for (range = rangeList; range != NULL; range = range->next)
        {
	/* Grab values we want from range. */
	double val = ptToInt(range->val);
	int start = range->start;
	int end = range->end;
	bits32 size = end - start;

	/* Add to total summary. */
	if (firstTime)
	    {
	    totalSum->validCount = size;
	    totalSum->minVal = totalSum->maxVal = val;
	    totalSum->sumData = val*size;
	    totalSum->sumSquares = val*val*size;
	    firstTime = FALSE;
	    }
	else
	    {
	    totalSum->validCount += size;
	    if (val < totalSum->minVal) totalSum->minVal = val;
	    if (val > totalSum->maxVal) totalSum->maxVal = val;
	    totalSum->sumData += val*size;
	    totalSum->sumSquares += val*val*size;
	    }

	/* If start past existing block then output it. */
	if (sum != NULL && sum->end <= start && sum->end < usage->size)
	    {
	    bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
		&boundsPt, boundsEnd, lm, stream);
	    sum = NULL;
	    }
	/* If don't have a summary we're working on now, make one. */
	if (sum == NULL)
	    {
	    oneSummary.chromId = usage->id;
	    oneSummary.start = start;
	    oneSummary.end = start + initialReduction;
	    if (oneSummary.end > usage->size) oneSummary.end = usage->size;
	    oneSummary.minVal = oneSummary.maxVal = val;
	    oneSummary.sumData = oneSummary.sumSquares = 0.0;
	    oneSummary.validCount = 0;
	    sum = &oneSummary;
	    }
	/* Deal with case where might have to split an item between multiple summaries.  This
	 * loop handles all but the final affected summary in that case. */
	while (end > sum->end)
	    {
	    /* Fold in bits that overlap with existing summary and output. */
	    int overlap = rangeIntersection(start, end, sum->start, sum->end);
	    assert(overlap > 0);
	    verbose(3, "Splitting size %d at %d, overlap %d\n", end - start, sum->end, overlap);
	    sum->validCount += overlap;
	    if (sum->minVal > val) sum->minVal = val;
	    if (sum->maxVal < val) sum->maxVal = val;
	    sum->sumData += val * overlap;
	    sum->sumSquares += val*val * overlap;
	    bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
		    &boundsPt, boundsEnd, lm, stream);
	    size -= overlap;

	    /* Move summary to next part. */
	    sum->start = start = sum->end;
	    sum->end = start + initialReduction;
	    if (sum->end > usage->size) sum->end = usage->size;
	    sum->minVal = sum->maxVal = val;
	    sum->sumData = sum->sumSquares = 0.0;
	    sum->validCount = 0;
	    }

	/* Add to summary. */
	sum->validCount += size;
	if (sum->minVal > val) sum->minVal = val;
	if (sum->maxVal < val) sum->maxVal = val;
	sum->sumData += val * size;
	sum->sumSquares += val*val * size;
	}
    if (sum != NULL)
	{
	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
	    &boundsPt, boundsEnd, lm, stream);
	}
    rangeTreeFree(&rangeTree);
    }
bbiSumOutStreamClose(&stream);

/* Write out 1st zoom index. */
int indexOffset = *retIndexStart = ftell(f);
assert(boundsPt == boundsEnd);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount,
    blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);

freez(&boundsArray);
slReverse(&twiceReducedList);
return twiceReducedList;
}
Exemplo n.º 8
0
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
	int blockSize, int itemsPerSlot, boolean doCompress, char *fileName)
/* Create a bigWig file out of a sorted sectionList. */
{
bits64 sectionCount = slCount(sectionList);
FILE *f = mustOpen(fileName, "wb");
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = 0;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;
bits64 dataOffset = 0, dataOffsetPos;
bits64 indexOffset = 0, indexOffsetPos;
bits64 chromTreeOffset = 0, chromTreeOffsetPos;
bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
bits32 uncompressBufSize = 0;
bits64 uncompressBufSizePos;
struct bbiSummary *reduceSummaries[10];
bits32 reductionAmounts[10];
bits64 reductionDataOffsetPos[10];
bits64 reductionDataOffsets[10];
bits64 reductionIndexOffsets[10];
int i;

/* Figure out chromosome ID's. */
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);

/* Figure out initial summary level - starting with a summary 10 times the amount
 * of the smallest item.  See if summarized data is smaller than half input data, if
 * not bump up reduction by a factor of 2 until it is, or until further summarying
 * yeilds no size reduction. */
int  minRes = bwgAverageResolution(sectionList);
int initialReduction = minRes*10;
bits64 fullSize = bwgTotalSectionSize(sectionList);
bits64 maxReducedSize = fullSize/2;
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
bits64 lastSummarySize = 0, summarySize;
for (;;)
    {
    summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
    bits64 summarySize = bbiTotalSummarySize(summaryList);
    if (doCompress)
	{
        summarySize *= 2;	// Compensate for summary not compressing as well as primary data
	}
    if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
        {
	/* Need to do more reduction.  First scale reduction by amount that it missed
	 * being small enough last time, with an extra 10% for good measure.  Then
	 * just to keep from spinning through loop two many times, make sure this is
	 * at least 2x the previous reduction. */
	int nextReduction = 1.1 * initialReduction * summarySize / maxReducedSize;
	if (nextReduction < initialReduction*2)
	    nextReduction = initialReduction*2;
	initialReduction = nextReduction;
	bbiSummaryFreeList(&summaryList);
	lastSummarySize = summarySize;
	}
    else
        break;
    }
summaryCount = 1;
reduceSummaries[0] = firstSummaryList = summaryList;
reductionAmounts[0] = initialReduction;

/* Now calculate up to 10 levels of further summary. */
bits64 reduction = initialReduction;
for (i=0; i<ArraySize(reduceSummaries)-1; i++)
    {
    reduction *= 4;
    if (reduction > 1000000000)
        break;
    summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, 
    	reduction);
    summarySize = bbiTotalSummarySize(summaryList);
    if (summarySize != lastSummarySize)
        {
 	reduceSummaries[summaryCount] = summaryList;
	reductionAmounts[summaryCount] = reduction;
	++summaryCount;
	}
    int summaryItemCount = slCount(summaryList);
    if (summaryItemCount <= chromCount)
        break;
    }

/* Write fixed header. */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
chromTreeOffsetPos = ftell(f);
writeOne(f, chromTreeOffset);
dataOffsetPos = ftell(f);
writeOne(f, dataOffset);
indexOffsetPos = ftell(f);
writeOne(f, indexOffset);
writeOne(f, reserved16);  /* fieldCount */
writeOne(f, reserved16);  /* definedFieldCount */
writeOne(f, reserved64);  /* autoSqlOffset. */
totalSummaryOffsetPos = ftell(f);
writeOne(f, totalSummaryOffset);
uncompressBufSizePos = ftell(f);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);  /* nameIndexOffset */
assert(ftell(f) == 64);

/* Write summary headers */
for (i=0; i<summaryCount; ++i)
    {
    writeOne(f, reductionAmounts[i]);
    writeOne(f, reserved32);
    reductionDataOffsetPos[i] = ftell(f);
    writeOne(f, reserved64);	// Fill in with data offset later
    writeOne(f, reserved64);	// Fill in with index offset later
    }

/* Write dummy summary */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write chromosome bPlusTree */
chromTreeOffset = ftell(f);
int chromBlockSize = min(blockSize, chromCount);
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
    bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, 
    sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), 
    f);

/* Write out data section count and sections themselves. */
dataOffset = ftell(f);
writeOne(f, sectionCount);
struct bwgSection *section;
for (section = sectionList; section != NULL; section = section->next)
    {
    bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f);
    if (uncSizeOne > uncompressBufSize)
         uncompressBufSize = uncSizeOne;
    }

/* Write out index - creating a temporary array rather than list representation of
 * sections in the process. */
indexOffset = ftell(f);
struct bwgSection **sectionArray;
AllocArray(sectionArray, sectionCount);
for (section = sectionList, i=0; section != NULL; section = section->next, ++i)
    sectionArray[i] = section;
cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount,
    blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, 
    indexOffset, f);
freez(&sectionArray);

/* Write out summary sections. */
verbose(2, "bwgCreate writing %d summaries\n", summaryCount);
for (i=0; i<summaryCount; ++i)
    {
    reductionDataOffsets[i] = ftell(f);
    reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f);
    verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i);
    }

/* Calculate summary */
struct bbiSummary *sum = firstSummaryList;
if (sum != NULL)
    {
    totalSum.validCount = sum->validCount;
    totalSum.minVal = sum->minVal;
    totalSum.maxVal = sum->maxVal;
    totalSum.sumData = sum->sumData;
    totalSum.sumSquares = sum->sumSquares;
    for (sum = sum->next; sum != NULL; sum = sum->next)
	{
	totalSum.validCount += sum->validCount;
	if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal;
	if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal;
	totalSum.sumData += sum->sumData;
	totalSum.sumSquares += sum->sumSquares;
	}
    /* Write real summary */
    fseek(f, totalSummaryOffset, SEEK_SET);
    bbiSummaryElementWrite(f, &totalSum);
    }
else
    totalSummaryOffset = 0;	/* Edge case, no summary. */

/* Go back and fill in offsets properly in header. */
fseek(f, dataOffsetPos, SEEK_SET);
writeOne(f, dataOffset);
fseek(f, indexOffsetPos, SEEK_SET);
writeOne(f, indexOffset);
fseek(f, chromTreeOffsetPos, SEEK_SET);
writeOne(f, chromTreeOffset);
fseek(f, totalSummaryOffsetPos, SEEK_SET);
writeOne(f, totalSummaryOffset);

if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    if (maxZoomUncompSize > uncompressBufSize)
	uncompressBufSize = maxZoomUncompSize;
    fseek(f, uncompressBufSizePos, SEEK_SET);
    writeOne(f, uncompressBufSize);
    }

/* Also fill in offsets in zoom headers. */
for (i=0; i<summaryCount; ++i)
    {
    fseek(f, reductionDataOffsetPos[i], SEEK_SET);
    writeOne(f, reductionDataOffsets[i]);
    writeOne(f, reductionIndexOffsets[i]);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

/* Clean up */
freez(&chromInfoArray);
carefulClose(&f);
}
Exemplo n.º 9
0
void bedGraphToBigWig(char *inName, char *chromSizes, char *outName)
/* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
{
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
int minDiff = 0, i;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, 
    &minDiff, &aveSize, &bedCount);
verboseTime(2, "pass1");
verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", 
    slCount(usageList), inName, minDiff, aveSize, bedCount);

/* Write out dummy header, zoom offsets. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);

/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);

/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);

/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
writeOne(f, sectionCount);
struct bbiBoundsArray *boundsArray;
AllocArray(boundsArray, sectionCount);
lineFileRewind(lf);
bits32 maxSectionSize = 0;
writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f,
	resTryCount, resScales, resSizes, doCompress, &maxSectionSize);
verboseTime(2, "pass2");

/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount,
    blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);
verboseTime(2, "index write");

/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];

/* Call monster zoom maker library function that bedToBigBed also uses. */
int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
    bedGraphWriteReducedOnceReturnReducedTwice, 4,
    doCompress, indexOffset - dataOffset, 
    usageList, resTryCount, resScales, resSizes, 
    zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);


/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    uncompressBufSize = max(maxSectionSize, maxZoomUncompSize);
    }

/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;

/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, reserved16);	// fieldCount
writeOne(f, reserved16);	// definedFieldCount
writeOne(f, reserved64);	// autoSqlOffset
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);	// nameIndexOffset
assert(ftell(f) == 64);

/* Write summary headers with data. */
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
    {
    verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
    writeOne(f, zoomAmounts[i]);
    writeOne(f, reserved32);
    writeOne(f, zoomDataOffsets[i]);
    writeOne(f, zoomIndexOffsets[i]);
    }
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
    {
    writeOne(f, reserved32);
    writeOne(f, reserved32);
    writeOne(f, reserved64);
    writeOne(f, reserved64);
    }

/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

lineFileClose(&lf);
carefulClose(&f);
}
Exemplo n.º 10
0
struct bbiSummary *bedGraphWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, 
	int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, 
	int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress,
	struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
	struct bbiSummaryElement *totalSum)
/* Write out data reduced by factor of initialReduction.  Also calculate and keep in memory
 * next reduction level.  This is more work than some ways, but it keeps us from having to
 * keep the first reduction entirely in memory. */
{
struct bbiSummary *twiceReducedList = NULL;
bits32 doubleReductionSize = initialReduction * zoomIncrement;
struct bbiChromUsage *usage = usageList;
struct bbiSummary oneSummary, *sum = NULL;
struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd;
boundsPt = AllocArray(boundsArray, initialReductionCount);
boundsEnd = boundsPt + initialReductionCount;

*retDataStart = ftell(f);
writeOne(f, initialReductionCount);
boolean firstRow = TRUE;

struct bbiSumOutStream *stream = bbiSumOutStreamOpen(itemsPerSlot, f, doCompress);

/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);

for (;;)
    {
    /* Get next line of input if any. */
    char *row[5];
    int rowSize = lineFileChopNext(lf, row, ArraySize(row));

    /* Output last section and break if at end of file. */
    if (rowSize == 0 && sum != NULL)
	{
	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
		&boundsPt, boundsEnd, lm, stream);
	break;
	}

    /* Parse out row. */
    char *chrom = row[0];
    bits32 start = sqlUnsigned(row[1]);
    bits32 end = sqlUnsigned(row[2]);
    float val = sqlFloat(row[3]);

    /* Update total summary stuff. */
    bits32 size = end-start;
    if (firstRow)
	{
        totalSum->validCount = size;
	totalSum->minVal = totalSum->maxVal = val;
	totalSum->sumData = val*size;
	totalSum->sumSquares = val*val*size;
	firstRow = FALSE;
	}
    else
        {
	totalSum->validCount += size;
	if (val < totalSum->minVal) totalSum->minVal = val;
	if (val > totalSum->maxVal) totalSum->maxVal = val;
	totalSum->sumData += val*size;
	totalSum->sumSquares += val*val*size;
	}

    /* If new chromosome output existing block. */
    if (differentString(chrom, usage->name))
        {
	usage = usage->next;
	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
		&boundsPt, boundsEnd, lm, stream);
	sum = NULL;
	}

    /* If start past existing block then output it. */
    else if (sum != NULL && sum->end <= start)
	{
	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
		&boundsPt, boundsEnd, lm, stream);
	sum = NULL;
	}

    /* If don't have a summary we're working on now, make one. */
    if (sum == NULL)
        {
	oneSummary.chromId = usage->id;
	oneSummary.start = start;
	oneSummary.end = start + initialReduction;
	if (oneSummary.end > usage->size) oneSummary.end = usage->size;
	oneSummary.minVal = oneSummary.maxVal = val;
	oneSummary.sumData = oneSummary.sumSquares = 0.0;
	oneSummary.validCount = 0;
	sum = &oneSummary;
	}
    
    /* Deal with case where might have to split an item between multiple summaries.  This
     * loop handles all but the final affected summary in that case. */
    while (end > sum->end)
        {
	verbose(3, "Splitting start %d, end %d, sum->start %d, sum->end %d\n", start, end, sum->start, sum->end);
	/* Fold in bits that overlap with existing summary and output. */
	bits32 overlap = rangeIntersection(start, end, sum->start, sum->end);
	sum->validCount += overlap;
	if (sum->minVal > val) sum->minVal = val;
	if (sum->maxVal < val) sum->maxVal = val;
	sum->sumData += val * overlap;
	sum->sumSquares += val*val * overlap;
	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
		&boundsPt, boundsEnd, lm, stream);
	size -= overlap;

	/* Move summary to next part. */
	sum->start = start = sum->end;
	sum->end = start + initialReduction;
	if (sum->end > usage->size) sum->end = usage->size;
	sum->minVal = sum->maxVal = val;
	sum->sumData = sum->sumSquares = 0.0;
	sum->validCount = 0;
	}

    /* Add to summary. */
    sum->validCount += size;
    if (sum->minVal > val) sum->minVal = val;
    if (sum->maxVal < val) sum->maxVal = val;
    sum->sumData += val * size;
    sum->sumSquares += val*val * size;
    }
bbiSumOutStreamClose(&stream);

/* Write out 1st zoom index. */
int indexOffset = *retIndexStart = ftell(f);
assert(boundsPt == boundsEnd);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount,
    blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);

freez(&boundsArray);
slReverse(&twiceReducedList);
return twiceReducedList;
}