void wigCorrelate(int inCount, char **inNames, char *outName) /* wigCorrelate - Produce a table that correlates all pairs of wigs.. */ { int i,j; FILE *f = mustOpen(outName, "w"); verboseTimeInit(); for (i=0; i<inCount-1; ++i) { char *iName = inNames[i]; struct metaWig *iMeta = metaWigOpen(iName); verboseTime(2, "parsed %s into %p", iName, iMeta); for (j=i+1; j<inCount; ++j) { char *jName = inNames[j]; struct metaWig *jMeta = metaWigOpen(jName); verboseTime(2, "parsed %s into %p", jName, jMeta); fprintf(f, "%s\t%s\t", iName, jName); fflush(f); double r = correlatePair(iMeta, jMeta); fprintf(f, "%g\n", r); fflush(f); verboseTime(2, "correlated %g from %s and %s", r, iName, jName); metaWigClose(&jMeta); } metaWigClose(&iMeta); } carefulClose(&f); }
void splatToEland(char *inName, char *outName) /* splatToEland - Convert from splat to eland format.. */ { verboseTimeInit(); struct splatAli *list = splatAliLoadAll(inName); verboseTime(1, "Loaded %d sequences from %s", slCount(list), inName); if (needSort(list)) { slSort(&list, splatAliCmpReadName); verboseTime(1, "Sorted by read name"); } FILE *f = mustOpen(outName, "w"); struct splatAli *el, *next; for (el = list; el != NULL; el = next) { int sameCount; findNextDifferent(el, &next, &sameCount); int bestScore, bestCount; splatAliLookForBest(el, next, &bestScore, &bestCount); int subCounts[3]; countSubsInList(el, next, subCounts); if (multi) elandMultiOutput(el, next, bestScore, bestCount, subCounts, f); else elandSingleOutput(el, next, bestScore, bestCount, subCounts, f); } }
int bbiWriteZoomLevels( struct lineFile *lf, /* Input file. */ FILE *f, /* Output. */ int blockSize, /* Size of index block */ int itemsPerSlot, /* Number of data points bundled at lowest level. */ bbiWriteReducedOnceReturnReducedTwice writeReducedOnceReturnReducedTwice, /* callback */ int fieldCount, /* Number of fields in bed (4 for bedGraph) */ boolean doCompress, /* Do we compress. Answer really should be yes! */ bits64 dataSize, /* Size of data on disk (after compression if any). */ struct bbiChromUsage *usageList, /* Result from bbiChromUsageFromBedFile */ int resTryCount, int resScales[], int resSizes[], /* How much to zoom at each level */ bits32 zoomAmounts[bbiMaxZoomLevels], /* Fills in amount zoomed at each level. */ bits64 zoomDataOffsets[bbiMaxZoomLevels], /* Fills in where data starts for each zoom level. */ bits64 zoomIndexOffsets[bbiMaxZoomLevels], /* Fills in where index starts for each level. */ struct bbiSummaryElement *totalSum) /* Write out all the zoom levels and return the number of levels written. Writes * actual zoom amount and the offsets of the zoomed data and index in the last three * parameters. Sorry for all the parameters - it was this or duplicate a big chunk of * code between bedToBigBed and bedGraphToBigWig. */ { /* Write out first zoomed section while storing in memory next zoom level. */ assert(resTryCount > 0); int maxReducedSize = dataSize/2; int initialReduction = 0, initialReducedCount = 0; /* Figure out initialReduction for zoom - one that is maxReducedSize or less. */ int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) { bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk); if (doCompress) reducedSize /= 2; // Estimate! if (reducedSize <= maxReducedSize) { initialReduction = resScales[resTry]; initialReducedCount = resSizes[resTry]; break; } } verbose(2, "initialReduction %d, initialReducedCount = %d\n", initialReduction, initialReducedCount); /* Force there to always be at least one zoom. It may waste a little space on small * files, but it makes files more uniform, and avoids special case code for calculating * overall file summary. */ if (initialReduction == 0) { initialReduction = resScales[0]; initialReducedCount = resSizes[0]; } /* Call routine to make the initial zoom level and also a bit of work towards further levels. */ struct lm *lm = lmInit(0); int zoomIncrement = bbiResIncrement; lineFileRewind(lf); struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, fieldCount, lf, initialReduction, initialReducedCount, zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, f, &zoomDataOffsets[0], &zoomIndexOffsets[0], totalSum); verboseTime(2, "writeReducedOnceReturnReducedTwice"); zoomAmounts[0] = initialReduction; int zoomLevels = 1; /* Loop around to do any additional levels of zoom. */ int zoomCount = initialReducedCount; int reduction = initialReduction * zoomIncrement; while (zoomLevels < bbiMaxZoomLevels) { int rezoomCount = slCount(rezoomedList); if (rezoomCount >= zoomCount) break; zoomCount = rezoomCount; zoomDataOffsets[zoomLevels] = ftell(f); zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, blockSize, itemsPerSlot, doCompress, f); zoomAmounts[zoomLevels] = reduction; ++zoomLevels; reduction *= zoomIncrement; rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm); } lmCleanup(&lm); verboseTime(2, "further reductions"); return zoomLevels; }
void itsaMake(int inCount, char *inputs[], char *output) /* itsaMake - Make a suffix array file out of input DNA sequences.. */ { verboseTimeInit(); bits64 maxGenomeSize = 1024LL*1024*1024*4; itsaBaseToValInit(); /* Load all DNA, make sure names are unique, and alphabetize by name. */ struct dnaSeq *seqList = NULL, *seq; struct hash *uniqSeqHash = hashNew(0); bits64 totalDnaSize = 1; /* FOr space between. */ int inputIx; for (inputIx=0; inputIx<inCount; ++inputIx) { char * input = inputs[inputIx]; struct dnaLoad *dl = dnaLoadOpen(input); while ((seq = dnaLoadNext(dl)) != NULL) { verbose(1, "read %s with %d bases\n", seq->name, seq->size); if (hashLookup(uniqSeqHash, seq->name)) errAbort("Input sequence name %s repeated, all must be unique.", seq->name); totalDnaSize += seq->size + 1; if (totalDnaSize > maxGenomeSize) errAbort("Too much DNA. Can only handle up to %lld bases", maxGenomeSize); slAddHead(&seqList, seq); } dnaLoadClose(&dl); } slSort(&seqList, dnaSeqCmpName); verboseTime(1, "Loaded %lld bases in %d sequences", totalDnaSize, slCount(seqList)); /* Allocate big buffer for all DNA. */ DNA *allDna = globalAllDna = needHugeMem(totalDnaSize); allDna[0] = 0; bits64 chromOffset = 1; /* Have zeroes between each chrom, and before and after. */ /* Copy DNA to a single big buffer, and create chromInfo on each sequence. */ struct chromInfo *chrom, *chromList = NULL; for (seq = seqList; seq != NULL; seq = seq->next) { AllocVar(chrom); chrom->name = cloneString(seq->name); chrom->size = seq->size; chrom->offset = chromOffset; slAddHead(&chromList, chrom); toUpperN(seq->dna, seq->size); memcpy(allDna + chromOffset, seq->dna, seq->size + 1); chromOffset += seq->size + 1; } slReverse(&chromList); /* Free up separate dna sequences because we're going to need a lot of RAM soon. */ /* Allocate index array, and offset and list arrays. */ dnaSeqFreeList(&seqList); bits32 *index13; AllocArray(index13, itsaSlotCount); bits32 *offsetArray = needHugeMem(totalDnaSize * sizeof(bits32)); bits32 *listArray = needHugeZeroedMem(totalDnaSize * sizeof(bits32)); verboseTime(1, "Allocated buffers %lld bytes total", (long long)(9LL*totalDnaSize + itsaSlotCount*sizeof(bits32))); /* Where normally we'd keep some sort of structure with a next element to form a list * of matching positions in each slot of our index, to conserve memory we'll do this * with two parallel arrays. Because we're such cheapskates in terms of memory we'll * (and still using 9*genomeSize bytes of RAM) we'll use these arrays for two different * purposes. * In the first phase they will together be used to form linked lists of * offsets, and the 13mer index will point to the first item in each list. In this * phase the offsetArray contains offsets into the allDna structure, and the listArray * contains the next pointers for the list. After the first phase we write out the * suffix array to disk. * In the second phase we read the suffix array back into the offsetArray, and * use the listArray for the traverseArray. We write out the traverse array to finish * things up. */ /* Load up all DNA buffer. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { verbose(2, " About to do first pass index\n"); indexChromPass1(chrom, allDna, offsetArray, listArray, index13); verbose(2, " Done first pass index\n"); } verboseTime(1, "Done big bucket sort"); slReverse(&chromList); itsaWriteMerged(chromList, allDna, offsetArray, listArray, index13, output); }
static void itsaWriteMerged(struct chromInfo *chromList, DNA *allDna, bits32 *offsetArray, bits32 *listArray, bits32 *index13, char *output) /* Write out a file that contains a single splix that is the merger of * all of the individual splixes in list. As a side effect will replace * offsetArray with suffix array and listArray with traverse array */ { FILE *f = mustOpen(output, "w+"); /** Allocate header and fill out easy constant fields. */ struct itsaFileHeader *header; AllocVar(header); header->majorVersion = ITSA_MAJOR_VERSION; header->minorVersion = ITSA_MINOR_VERSION; /* Figure out sizes of names and sequence for each chromosome. */ struct chromInfo *chrom; bits32 chromNamesSize = 0; bits64 dnaDiskSize = 1; /* For initial zero. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { chromNamesSize += strlen(chrom->name) + 1; dnaDiskSize += chrom->size + 1; /* Include separating zeroes. */ } /* Fill in most of rest of header fields */ header->chromCount = slCount(chromList); header->chromNamesSize = roundUpTo4(chromNamesSize); header->dnaDiskSize = roundUpTo4(dnaDiskSize); bits32 chromSizesSize = header->chromCount*sizeof(bits32); /* Write header. */ mustWrite(f, header, sizeof(*header)); /* Write chromosome names. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) mustWrite(f, chrom->name, strlen(chrom->name)+1); zeroPad(f, header->chromNamesSize - chromNamesSize); /* Write chromosome sizes. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) mustWrite(f, &chrom->size, sizeof(chrom->size)); int chromSizesSizePad = chromSizesSize - header->chromCount * sizeof(bits32); zeroPad(f, chromSizesSizePad); /* Write out chromosome DNA and zeros before, between, and after. */ mustWrite(f, allDna, dnaDiskSize); zeroPad(f, header->dnaDiskSize - dnaDiskSize); verboseTime(1, "Wrote %lld bases of DNA including zero padding", header->dnaDiskSize); /* Calculate and write suffix array. Convert index13 to index of array as opposed to index * of sequence. */ bits64 arraySize = 0; off_t suffixArrayFileOffset = ftello(f); int slotCount = itsaSlotCount; int slotIx; for (slotIx=0; slotIx < slotCount; ++slotIx) { int slotSize = finishAndWriteOneSlot(offsetArray, listArray, index13[slotIx], allDna, f); /* Convert index13 to hold the position in the suffix array where the first thing matching * the corresponding 13-base prefix is found. */ if (slotSize != 0) index13[slotIx] = arraySize+1; /* The +1 is so we can keep 0 for not found. */ else index13[slotIx] = 0; arraySize += slotSize; if ((slotIx % 200000 == 0) && slotIx != 0) { verboseDot(); if (slotIx % 10000000 == 0) verbose(1, "fine sort bucket %d of %d\n", slotIx, slotCount); } } verbose(1, "fine sort bucket %d of %d\n", slotCount, slotCount); verboseTime(1, "Wrote %lld suffix array positions", arraySize); /* Now we're done with the offsetArray and listArray buffers, so use them for the * next phase. */ bits32 *suffixArray = offsetArray; offsetArray = NULL; /* Help make some errors more obvious */ bits32 *traverseArray = listArray; listArray = NULL; /* Help make some errors more obvious */ /* Read the suffix array back from the file. */ fseeko(f, suffixArrayFileOffset, SEEK_SET); mustRead(f, suffixArray, arraySize*sizeof(bits32)); verboseTime(1, "Read suffix array back in"); /* Calculate traverse array and cursor arrays */ memset(traverseArray, 0, arraySize*sizeof(bits32)); UBYTE *cursorArray = needHugeMem(arraySize); itsaFillInTraverseArray(allDna, suffixArray, arraySize, traverseArray, cursorArray); verboseTime(1, "Filled in traverseArray"); /* Write out traverse array. */ mustWrite(f, traverseArray, arraySize*sizeof(bits32)); verboseTime(1, "Wrote out traverseArray"); /* Write out 13-mer index. */ mustWrite(f, index13, itsaSlotCount*sizeof(bits32)); verboseTime(1, "Wrote out index13"); /* Write out bits of cursor array corresponding to index. */ for (slotIx=0; slotIx<itsaSlotCount; ++slotIx) { bits32 indexPos = index13[slotIx]; if (indexPos == 0) fputc(0, f); else fputc(cursorArray[indexPos-1], f); } verboseTime(1, "Wrote out cursors13"); /* Update a few fields in header, and go back and write it out again with * the correct magic number to indicate it's complete. */ header->magic = ITSA_MAGIC; header->arraySize = arraySize; header->size = sizeof(*header) // header + header->chromNamesSize + // chromosome names + header->chromCount * sizeof(bits32) // chromosome sizes + header->dnaDiskSize // dna sequence + sizeof(bits32) * arraySize // suffix array + sizeof(bits32) * arraySize // traverse array + sizeof(bits32) * itsaSlotCount // index13 + sizeof(UBYTE) * itsaSlotCount; // cursors13 rewind(f); mustWrite(f, header, sizeof(*header)); carefulClose(&f); verbose(1, "Completed %s is %lld bytes\n", output, header->size); }
void bbFileCreate( char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */ char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ char *asText, /* Field definitions in a string */ struct asObject *as, /* Field definitions parsed out */ boolean doCompress, /* If TRUE then compress data. */ struct slName *extraIndexList, /* List of extra indexes to add */ char *outName) /* BigBed output file name. */ /* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ { /* Set up timing measures. */ verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); bits16 fieldCount = slCount(as->columnList); bits16 extraIndexCount = slCount(extraIndexList); struct bbExIndexMaker *eim = NULL; if (extraIndexList != NULL) eim = bbExIndexMakerNew(extraIndexList, as); /* Load in chromosome sizes. */ struct hash *chromSizesHash = NULL; if (sizesIs2Bit) chromSizesHash = twoBitChromHash(chromSizes); else chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "Read %d chromosomes and sizes from %s\n", chromSizesHash->elCount, chromSizes); /* Do first pass, mostly just scanning file and counting hits per chromosome. */ int minDiff = 0; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount, tabSep); verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList)); verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize); /* Open output file and write dummy header. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out autoSql string */ bits64 asOffset = ftell(f); mustWrite(f, asText, strlen(asText) + 1); verbose(2, "as definition has %d columns\n", fieldCount); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out dummy header extension */ bits64 extHeaderOffset = ftell(f); bits16 extHeaderSize = 64; repeatCharOut(f, 0, extHeaderSize); /* Write out extra index stuff if need be. */ bits64 extraIndexListOffset = 0; bits64 extraIndexListEndOffset = 0; if (extraIndexList != NULL) { extraIndexListOffset = ftell(f); int extraIndexSize = 16 + 4*1; // Fixed record size 16, plus 1 times field size of 4 repeatCharOut(f, 0, extraIndexSize*extraIndexCount); extraIndexListEndOffset = ftell(f); } /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); bits32 blockCount = 0; bits32 maxBlockSize = 0; struct bbiBoundsArray *boundsArray = NULL; writeOne(f, bedCount); if (bedCount > 0) { blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); AllocArray(boundsArray, blockCount); lineFileRewind(lf); if (eim) bbExIndexMakerAllocChunkArrays(eim, bedCount); writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress, f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize); } verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", (long long)bedCount, fieldCount); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; /* Call monster zoom maker library function that bedGraphToBigWig also uses. */ int zoomLevels = 0; if (bedCount > 0) { zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedWriteReducedOnceReturnReducedTwice, fieldCount, doCompress, indexOffset - dataOffset, usageList, resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); } /* Write out extra indexes if need be. */ if (eim) { int i; for (i=0; i < eim->indexCount; ++i) { eim->fileOffsets[i] = ftell(f); maxBedNameSize = eim->maxFieldSize[i]; qsort(eim->chunkArrayArray[i], bedCount, sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName); assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); verboseTime(1, "Sorting and writing extra index %d", i); } } /* Figure out buffer size needed for uncompression if need be. */ if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); uncompressBufSize = max(maxBlockSize, maxZoomUncompSize); } /* Go back and rewrite header. */ rewind(f); bits32 sig = bigBedSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = zoomLevels; bits32 reserved32 = 0; bits64 reserved64 = 0; bits16 definedFieldCount = bedN; /* Write fixed header */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); writeOne(f, chromTreeOffset); writeOne(f, dataOffset); writeOne(f, indexOffset); writeOne(f, fieldCount); writeOne(f, definedFieldCount); writeOne(f, asOffset); writeOne(f, totalSummaryOffset); writeOne(f, uncompressBufSize); writeOne(f, extHeaderOffset); assert(ftell(f) == 64); /* Write summary headers with data. */ int i; verbose(2, "Writing %d levels of zoom\n", zoomLevels); for (i=0; i<zoomLevels; ++i) { verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]); writeOne(f, zoomAmounts[i]); writeOne(f, reserved32); writeOne(f, zoomDataOffsets[i]); writeOne(f, zoomIndexOffsets[i]); } /* Write rest of summary headers with no data. */ for (i=zoomLevels; i<bbiMaxZoomLevels; ++i) { writeOne(f, reserved32); writeOne(f, reserved32); writeOne(f, reserved64); writeOne(f, reserved64); } /* Write total summary. */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); /* Write extended header */ fseek(f, extHeaderOffset, SEEK_SET); writeOne(f, extHeaderSize); writeOne(f, extraIndexCount); writeOne(f, extraIndexListOffset); repeatCharOut(f, 0, 52); // reserved assert(ftell(f) - extHeaderOffset == extHeaderSize); /* Write extra index offsets if need be. */ if (extraIndexCount != 0) { fseek(f, extraIndexListOffset, SEEK_SET); int i; for (i=0; i<extraIndexCount; ++i) { // Write out fixed part of index info bits16 type = 0; // bPlusTree type bits16 indexFieldCount = 1; writeOne(f, type); writeOne(f, indexFieldCount); writeOne(f, eim->fileOffsets[i]); repeatCharOut(f, 0, 4); // reserved // Write out field list - easy this time because for now always only one field. bits16 fieldId = eim->indexFields[i]; writeOne(f, fieldId); repeatCharOut(f, 0, 2); // reserved } assert(ftell(f) == extraIndexListEndOffset); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up. */ lineFileClose(&lf); carefulClose(&f); freeHash(&chromSizesHash); bbiChromUsageFreeList(&usageList); asObjectFreeList(&as); }
void bedGraphToBigWig(char *inName, char *chromSizes, char *outName) /* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */ { verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes); int minDiff = 0, i; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, &minDiff, &aveSize, &bedCount); verboseTime(2, "pass1"); verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", slCount(usageList), inName, minDiff, aveSize, bedCount); /* Write out dummy header, zoom offsets. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); writeOne(f, sectionCount); struct bbiBoundsArray *boundsArray; AllocArray(boundsArray, sectionCount); lineFileRewind(lf); bits32 maxSectionSize = 0; writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f, resTryCount, resScales, resSizes, doCompress, &maxSectionSize); verboseTime(2, "pass2"); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; /* Call monster zoom maker library function that bedToBigBed also uses. */ int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedGraphWriteReducedOnceReturnReducedTwice, 4, doCompress, indexOffset - dataOffset, usageList, resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); /* Figure out buffer size needed for uncompression if need be. */ if (doCompress) { int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); uncompressBufSize = max(maxSectionSize, maxZoomUncompSize); } /* Go back and rewrite header. */ rewind(f); bits32 sig = bigWigSig; bits16 version = bbiCurrentVersion; bits16 summaryCount = zoomLevels; bits16 reserved16 = 0; bits32 reserved32 = 0; bits64 reserved64 = 0; /* Write fixed header */ writeOne(f, sig); writeOne(f, version); writeOne(f, summaryCount); writeOne(f, chromTreeOffset); writeOne(f, dataOffset); writeOne(f, indexOffset); writeOne(f, reserved16); // fieldCount writeOne(f, reserved16); // definedFieldCount writeOne(f, reserved64); // autoSqlOffset writeOne(f, totalSummaryOffset); writeOne(f, uncompressBufSize); writeOne(f, reserved64); // nameIndexOffset assert(ftell(f) == 64); /* Write summary headers with data. */ verbose(2, "Writing %d levels of zoom\n", zoomLevels); for (i=0; i<zoomLevels; ++i) { verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]); writeOne(f, zoomAmounts[i]); writeOne(f, reserved32); writeOne(f, zoomDataOffsets[i]); writeOne(f, zoomIndexOffsets[i]); } /* Write rest of summary headers with no data. */ for (i=zoomLevels; i<bbiMaxZoomLevels; ++i) { writeOne(f, reserved32); writeOne(f, reserved32); writeOne(f, reserved64); writeOne(f, reserved64); } /* Write total summary. */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); lineFileClose(&lf); carefulClose(&f); }