Exemplo n.º 1
0
int bbiWriteZoomLevels(
    struct lineFile *lf,    /* Input file. */
    FILE *f,		    /* Output. */
    int blockSize,	    /* Size of index block */
    int itemsPerSlot,	    /* Number of data points bundled at lowest level. */
    bbiWriteReducedOnceReturnReducedTwice writeReducedOnceReturnReducedTwice,   /* callback */
    int fieldCount,	    /* Number of fields in bed (4 for bedGraph) */
    boolean doCompress,	    /* Do we compress.  Answer really should be yes! */
    bits64 dataSize,	    /* Size of data on disk (after compression if any). */
    struct bbiChromUsage *usageList, /* Result from bbiChromUsageFromBedFile */
    int resTryCount, int resScales[], int resSizes[],   /* How much to zoom at each level */
    bits32 zoomAmounts[bbiMaxZoomLevels],      /* Fills in amount zoomed at each level. */
    bits64 zoomDataOffsets[bbiMaxZoomLevels],  /* Fills in where data starts for each zoom level. */
    bits64 zoomIndexOffsets[bbiMaxZoomLevels], /* Fills in where index starts for each level. */
    struct bbiSummaryElement *totalSum)
/* Write out all the zoom levels and return the number of levels written.  Writes 
 * actual zoom amount and the offsets of the zoomed data and index in the last three
 * parameters.  Sorry for all the parameters - it was this or duplicate a big chunk of
 * code between bedToBigBed and bedGraphToBigWig. */
{
/* Write out first zoomed section while storing in memory next zoom level. */
assert(resTryCount > 0);
int maxReducedSize = dataSize/2;
int initialReduction = 0, initialReducedCount = 0;

/* Figure out initialReduction for zoom - one that is maxReducedSize or less. */
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    {
    bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk);
    if (doCompress)
	reducedSize /= 2;	// Estimate!
    if (reducedSize <= maxReducedSize)
	{
	initialReduction = resScales[resTry];
	initialReducedCount = resSizes[resTry];
	break;
	}
    }
verbose(2, "initialReduction %d, initialReducedCount = %d\n", 
    initialReduction, initialReducedCount);

/* Force there to always be at least one zoom.  It may waste a little space on small
 * files, but it makes files more uniform, and avoids special case code for calculating
 * overall file summary. */
if (initialReduction == 0)
    {
    initialReduction = resScales[0];
    initialReducedCount = resSizes[0];
    }

/* Call routine to make the initial zoom level and also a bit of work towards further levels. */
struct lm *lm = lmInit(0);
int zoomIncrement = bbiResIncrement;
lineFileRewind(lf);
struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, fieldCount,
	lf, initialReduction, initialReducedCount,
	zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, 
	f, &zoomDataOffsets[0], &zoomIndexOffsets[0], totalSum);
verboseTime(2, "writeReducedOnceReturnReducedTwice");
zoomAmounts[0] = initialReduction;
int zoomLevels = 1;

/* Loop around to do any additional levels of zoom. */
int zoomCount = initialReducedCount;
int reduction = initialReduction * zoomIncrement;
while (zoomLevels < bbiMaxZoomLevels)
    {
    int rezoomCount = slCount(rezoomedList);
    if (rezoomCount >= zoomCount)
	break;
    zoomCount = rezoomCount;
    zoomDataOffsets[zoomLevels] = ftell(f);
    zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, 
	blockSize, itemsPerSlot, doCompress, f);
    zoomAmounts[zoomLevels] = reduction;
    ++zoomLevels;
    reduction *= zoomIncrement;
    rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm);
    }
lmCleanup(&lm);
verboseTime(2, "further reductions");
return zoomLevels;
}
Exemplo n.º 2
0
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
	int blockSize, int itemsPerSlot, boolean doCompress, boolean keepAllChromosomes,
        boolean fixedSummaries, char *fileName)
/* Create a bigWig file out of a sorted sectionList. */
{
bits64 sectionCount = slCount(sectionList);
FILE *f = mustOpen(fileName, "wb");
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = 0;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;
bits64 dataOffset = 0, dataOffsetPos;
bits64 indexOffset = 0, indexOffsetPos;
bits64 chromTreeOffset = 0, chromTreeOffsetPos;
bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
bits32 uncompressBufSize = 0;
bits64 uncompressBufSizePos;
struct bbiSummary *reduceSummaries[10];
bits32 reductionAmounts[10];
bits64 reductionDataOffsetPos[10];
bits64 reductionDataOffsets[10];
bits64 reductionIndexOffsets[10];
int i;

/* Figure out chromosome ID's. */
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
if (keepAllChromosomes)
    bwgMakeAllChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
else
    bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);

if (fixedSummaries) 
    bwgComputeFixedSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, reductionAmounts);
else 
    bwgComputeDynamicSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, chromCount, reductionAmounts, doCompress);

/* Write fixed header. */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
chromTreeOffsetPos = ftell(f);
writeOne(f, chromTreeOffset);
dataOffsetPos = ftell(f);
writeOne(f, dataOffset);
indexOffsetPos = ftell(f);
writeOne(f, indexOffset);
writeOne(f, reserved16);  /* fieldCount */
writeOne(f, reserved16);  /* definedFieldCount */
writeOne(f, reserved64);  /* autoSqlOffset. */
totalSummaryOffsetPos = ftell(f);
writeOne(f, totalSummaryOffset);
uncompressBufSizePos = ftell(f);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);  /* nameIndexOffset */
assert(ftell(f) == 64);

/* Write summary headers */
for (i=0; i<summaryCount; ++i)
    {
    writeOne(f, reductionAmounts[i]);
    writeOne(f, reserved32);
    reductionDataOffsetPos[i] = ftell(f);
    writeOne(f, reserved64);	// Fill in with data offset later
    writeOne(f, reserved64);	// Fill in with index offset later
    }

/* Write dummy summary */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write chromosome bPlusTree */
chromTreeOffset = ftell(f);
int chromBlockSize = min(blockSize, chromCount);
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
    bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, 
    sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), 
    f);

/* Write out data section count and sections themselves. */
dataOffset = ftell(f);
writeOne(f, sectionCount);
struct bwgSection *section;
for (section = sectionList; section != NULL; section = section->next)
    {
    bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f);
    if (uncSizeOne > uncompressBufSize)
         uncompressBufSize = uncSizeOne;
    }

/* Write out index - creating a temporary array rather than list representation of
 * sections in the process. */
indexOffset = ftell(f);
struct bwgSection **sectionArray;
AllocArray(sectionArray, sectionCount);
for (section = sectionList, i=0; section != NULL; section = section->next, ++i)
    sectionArray[i] = section;
cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount,
    blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, 
    indexOffset, f);
freez(&sectionArray);

/* Write out summary sections. */
verbose(2, "bwgCreate writing %d summaries\n", summaryCount);
for (i=0; i<summaryCount; ++i)
    {
    reductionDataOffsets[i] = ftell(f);
    reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f);
    verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i);
    }

/* Calculate summary */
struct bbiSummary *sum = reduceSummaries[0];
if (sum != NULL)
    {
    totalSum.validCount = sum->validCount;
    totalSum.minVal = sum->minVal;
    totalSum.maxVal = sum->maxVal;
    totalSum.sumData = sum->sumData;
    totalSum.sumSquares = sum->sumSquares;
    for (sum = sum->next; sum != NULL; sum = sum->next)
	{
	totalSum.validCount += sum->validCount;
	if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal;
	if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal;
	totalSum.sumData += sum->sumData;
	totalSum.sumSquares += sum->sumSquares;
	}
    /* Write real summary */
    fseek(f, totalSummaryOffset, SEEK_SET);
    bbiSummaryElementWrite(f, &totalSum);
    }
else
    totalSummaryOffset = 0;	/* Edge case, no summary. */

/* Go back and fill in offsets properly in header. */
fseek(f, dataOffsetPos, SEEK_SET);
writeOne(f, dataOffset);
fseek(f, indexOffsetPos, SEEK_SET);
writeOne(f, indexOffset);
fseek(f, chromTreeOffsetPos, SEEK_SET);
writeOne(f, chromTreeOffset);
fseek(f, totalSummaryOffsetPos, SEEK_SET);
writeOne(f, totalSummaryOffset);

if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    if (maxZoomUncompSize > uncompressBufSize)
	uncompressBufSize = maxZoomUncompSize;
    fseek(f, uncompressBufSizePos, SEEK_SET);
    writeOne(f, uncompressBufSize);
    }

/* Also fill in offsets in zoom headers. */
for (i=0; i<summaryCount; ++i)
    {
    fseek(f, reductionDataOffsetPos[i], SEEK_SET);
    writeOne(f, reductionDataOffsets[i]);
    writeOne(f, reductionIndexOffsets[i]);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

/* Clean up */
freez(&chromInfoArray);
carefulClose(&f);
}
Exemplo n.º 3
0
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
	int blockSize, int itemsPerSlot, boolean doCompress, char *fileName)
/* Create a bigWig file out of a sorted sectionList. */
{
bits64 sectionCount = slCount(sectionList);
FILE *f = mustOpen(fileName, "wb");
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = 0;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;
bits64 dataOffset = 0, dataOffsetPos;
bits64 indexOffset = 0, indexOffsetPos;
bits64 chromTreeOffset = 0, chromTreeOffsetPos;
bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
bits32 uncompressBufSize = 0;
bits64 uncompressBufSizePos;
struct bbiSummary *reduceSummaries[10];
bits32 reductionAmounts[10];
bits64 reductionDataOffsetPos[10];
bits64 reductionDataOffsets[10];
bits64 reductionIndexOffsets[10];
int i;

/* Figure out chromosome ID's. */
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);

/* Figure out initial summary level - starting with a summary 10 times the amount
 * of the smallest item.  See if summarized data is smaller than half input data, if
 * not bump up reduction by a factor of 2 until it is, or until further summarying
 * yeilds no size reduction. */
int  minRes = bwgAverageResolution(sectionList);
int initialReduction = minRes*10;
bits64 fullSize = bwgTotalSectionSize(sectionList);
bits64 maxReducedSize = fullSize/2;
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
bits64 lastSummarySize = 0, summarySize;
for (;;)
    {
    summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
    bits64 summarySize = bbiTotalSummarySize(summaryList);
    if (doCompress)
	{
        summarySize *= 2;	// Compensate for summary not compressing as well as primary data
	}
    if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
        {
	/* Need to do more reduction.  First scale reduction by amount that it missed
	 * being small enough last time, with an extra 10% for good measure.  Then
	 * just to keep from spinning through loop two many times, make sure this is
	 * at least 2x the previous reduction. */
	int nextReduction = 1.1 * initialReduction * summarySize / maxReducedSize;
	if (nextReduction < initialReduction*2)
	    nextReduction = initialReduction*2;
	initialReduction = nextReduction;
	bbiSummaryFreeList(&summaryList);
	lastSummarySize = summarySize;
	}
    else
        break;
    }
summaryCount = 1;
reduceSummaries[0] = firstSummaryList = summaryList;
reductionAmounts[0] = initialReduction;

/* Now calculate up to 10 levels of further summary. */
bits64 reduction = initialReduction;
for (i=0; i<ArraySize(reduceSummaries)-1; i++)
    {
    reduction *= 4;
    if (reduction > 1000000000)
        break;
    summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, 
    	reduction);
    summarySize = bbiTotalSummarySize(summaryList);
    if (summarySize != lastSummarySize)
        {
 	reduceSummaries[summaryCount] = summaryList;
	reductionAmounts[summaryCount] = reduction;
	++summaryCount;
	}
    int summaryItemCount = slCount(summaryList);
    if (summaryItemCount <= chromCount)
        break;
    }

/* Write fixed header. */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
chromTreeOffsetPos = ftell(f);
writeOne(f, chromTreeOffset);
dataOffsetPos = ftell(f);
writeOne(f, dataOffset);
indexOffsetPos = ftell(f);
writeOne(f, indexOffset);
writeOne(f, reserved16);  /* fieldCount */
writeOne(f, reserved16);  /* definedFieldCount */
writeOne(f, reserved64);  /* autoSqlOffset. */
totalSummaryOffsetPos = ftell(f);
writeOne(f, totalSummaryOffset);
uncompressBufSizePos = ftell(f);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);  /* nameIndexOffset */
assert(ftell(f) == 64);

/* Write summary headers */
for (i=0; i<summaryCount; ++i)
    {
    writeOne(f, reductionAmounts[i]);
    writeOne(f, reserved32);
    reductionDataOffsetPos[i] = ftell(f);
    writeOne(f, reserved64);	// Fill in with data offset later
    writeOne(f, reserved64);	// Fill in with index offset later
    }

/* Write dummy summary */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write chromosome bPlusTree */
chromTreeOffset = ftell(f);
int chromBlockSize = min(blockSize, chromCount);
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
    bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal, 
    sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size), 
    f);

/* Write out data section count and sections themselves. */
dataOffset = ftell(f);
writeOne(f, sectionCount);
struct bwgSection *section;
for (section = sectionList; section != NULL; section = section->next)
    {
    bits32 uncSizeOne = bwgSectionWrite(section, doCompress, f);
    if (uncSizeOne > uncompressBufSize)
         uncompressBufSize = uncSizeOne;
    }

/* Write out index - creating a temporary array rather than list representation of
 * sections in the process. */
indexOffset = ftell(f);
struct bwgSection **sectionArray;
AllocArray(sectionArray, sectionCount);
for (section = sectionList, i=0; section != NULL; section = section->next, ++i)
    sectionArray[i] = section;
cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount,
    blockSize, 1, NULL, bwgSectionFetchKey, bwgSectionFetchOffset, 
    indexOffset, f);
freez(&sectionArray);

/* Write out summary sections. */
verbose(2, "bwgCreate writing %d summaries\n", summaryCount);
for (i=0; i<summaryCount; ++i)
    {
    reductionDataOffsets[i] = ftell(f);
    reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, doCompress, f);
    verbose(3, "wrote %d of data, %d of index on level %d\n", (int)(reductionIndexOffsets[i] - reductionDataOffsets[i]), (int)(ftell(f) - reductionIndexOffsets[i]), i);
    }

/* Calculate summary */
struct bbiSummary *sum = firstSummaryList;
if (sum != NULL)
    {
    totalSum.validCount = sum->validCount;
    totalSum.minVal = sum->minVal;
    totalSum.maxVal = sum->maxVal;
    totalSum.sumData = sum->sumData;
    totalSum.sumSquares = sum->sumSquares;
    for (sum = sum->next; sum != NULL; sum = sum->next)
	{
	totalSum.validCount += sum->validCount;
	if (sum->minVal < totalSum.minVal) totalSum.minVal = sum->minVal;
	if (sum->maxVal > totalSum.maxVal) totalSum.maxVal = sum->maxVal;
	totalSum.sumData += sum->sumData;
	totalSum.sumSquares += sum->sumSquares;
	}
    /* Write real summary */
    fseek(f, totalSummaryOffset, SEEK_SET);
    bbiSummaryElementWrite(f, &totalSum);
    }
else
    totalSummaryOffset = 0;	/* Edge case, no summary. */

/* Go back and fill in offsets properly in header. */
fseek(f, dataOffsetPos, SEEK_SET);
writeOne(f, dataOffset);
fseek(f, indexOffsetPos, SEEK_SET);
writeOne(f, indexOffset);
fseek(f, chromTreeOffsetPos, SEEK_SET);
writeOne(f, chromTreeOffset);
fseek(f, totalSummaryOffsetPos, SEEK_SET);
writeOne(f, totalSummaryOffset);

if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    if (maxZoomUncompSize > uncompressBufSize)
	uncompressBufSize = maxZoomUncompSize;
    fseek(f, uncompressBufSizePos, SEEK_SET);
    writeOne(f, uncompressBufSize);
    }

/* Also fill in offsets in zoom headers. */
for (i=0; i<summaryCount; ++i)
    {
    fseek(f, reductionDataOffsetPos[i], SEEK_SET);
    writeOne(f, reductionDataOffsets[i]);
    writeOne(f, reductionIndexOffsets[i]);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

/* Clean up */
freez(&chromInfoArray);
carefulClose(&f);
}