Example #1
0
void wigCorrelate(int inCount, char **inNames, char *outName)
/* wigCorrelate - Produce a table that correlates all pairs of wigs.. */
{
int i,j;
FILE *f = mustOpen(outName, "w");
verboseTimeInit();
for (i=0; i<inCount-1; ++i)
    {
    char *iName = inNames[i];
    struct metaWig *iMeta = metaWigOpen(iName);
    verboseTime(2, "parsed %s into %p", iName, iMeta);
    for (j=i+1; j<inCount; ++j)
        {
	char *jName = inNames[j];
	struct metaWig *jMeta = metaWigOpen(jName);
	verboseTime(2, "parsed %s into %p", jName, jMeta);
	fprintf(f, "%s\t%s\t", iName, jName);
	fflush(f);
	double r = correlatePair(iMeta, jMeta);
	fprintf(f, "%g\n", r);
	fflush(f);
	verboseTime(2, "correlated %g from %s and %s", r, iName, jName);
	metaWigClose(&jMeta);
	}
    metaWigClose(&iMeta);
    }
carefulClose(&f);
}
void splatToEland(char *inName, char *outName)
/* splatToEland - Convert from splat to eland format.. */
{
verboseTimeInit();
struct splatAli *list = splatAliLoadAll(inName);
verboseTime(1, "Loaded %d sequences from %s", slCount(list), inName);
if (needSort(list))
    {
    slSort(&list, splatAliCmpReadName);
    verboseTime(1, "Sorted by read name");
    }
FILE *f = mustOpen(outName, "w");
struct splatAli *el, *next;
for (el = list; el != NULL; el = next)
    {
    int sameCount;
    findNextDifferent(el, &next, &sameCount);
    int bestScore, bestCount;
    splatAliLookForBest(el, next, &bestScore, &bestCount);
    int subCounts[3];
    countSubsInList(el, next, subCounts);
    if (multi)
        elandMultiOutput(el, next, bestScore, bestCount, subCounts, f);
    else
        elandSingleOutput(el, next, bestScore, bestCount, subCounts, f);
    }
}
Example #3
0
int bbiWriteZoomLevels(
    struct lineFile *lf,    /* Input file. */
    FILE *f,		    /* Output. */
    int blockSize,	    /* Size of index block */
    int itemsPerSlot,	    /* Number of data points bundled at lowest level. */
    bbiWriteReducedOnceReturnReducedTwice writeReducedOnceReturnReducedTwice,   /* callback */
    int fieldCount,	    /* Number of fields in bed (4 for bedGraph) */
    boolean doCompress,	    /* Do we compress.  Answer really should be yes! */
    bits64 dataSize,	    /* Size of data on disk (after compression if any). */
    struct bbiChromUsage *usageList, /* Result from bbiChromUsageFromBedFile */
    int resTryCount, int resScales[], int resSizes[],   /* How much to zoom at each level */
    bits32 zoomAmounts[bbiMaxZoomLevels],      /* Fills in amount zoomed at each level. */
    bits64 zoomDataOffsets[bbiMaxZoomLevels],  /* Fills in where data starts for each zoom level. */
    bits64 zoomIndexOffsets[bbiMaxZoomLevels], /* Fills in where index starts for each level. */
    struct bbiSummaryElement *totalSum)
/* Write out all the zoom levels and return the number of levels written.  Writes 
 * actual zoom amount and the offsets of the zoomed data and index in the last three
 * parameters.  Sorry for all the parameters - it was this or duplicate a big chunk of
 * code between bedToBigBed and bedGraphToBigWig. */
{
/* Write out first zoomed section while storing in memory next zoom level. */
assert(resTryCount > 0);
int maxReducedSize = dataSize/2;
int initialReduction = 0, initialReducedCount = 0;

/* Figure out initialReduction for zoom - one that is maxReducedSize or less. */
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    {
    bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk);
    if (doCompress)
	reducedSize /= 2;	// Estimate!
    if (reducedSize <= maxReducedSize)
	{
	initialReduction = resScales[resTry];
	initialReducedCount = resSizes[resTry];
	break;
	}
    }
verbose(2, "initialReduction %d, initialReducedCount = %d\n", 
    initialReduction, initialReducedCount);

/* Force there to always be at least one zoom.  It may waste a little space on small
 * files, but it makes files more uniform, and avoids special case code for calculating
 * overall file summary. */
if (initialReduction == 0)
    {
    initialReduction = resScales[0];
    initialReducedCount = resSizes[0];
    }

/* Call routine to make the initial zoom level and also a bit of work towards further levels. */
struct lm *lm = lmInit(0);
int zoomIncrement = bbiResIncrement;
lineFileRewind(lf);
struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, fieldCount,
	lf, initialReduction, initialReducedCount,
	zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, 
	f, &zoomDataOffsets[0], &zoomIndexOffsets[0], totalSum);
verboseTime(2, "writeReducedOnceReturnReducedTwice");
zoomAmounts[0] = initialReduction;
int zoomLevels = 1;

/* Loop around to do any additional levels of zoom. */
int zoomCount = initialReducedCount;
int reduction = initialReduction * zoomIncrement;
while (zoomLevels < bbiMaxZoomLevels)
    {
    int rezoomCount = slCount(rezoomedList);
    if (rezoomCount >= zoomCount)
	break;
    zoomCount = rezoomCount;
    zoomDataOffsets[zoomLevels] = ftell(f);
    zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, 
	blockSize, itemsPerSlot, doCompress, f);
    zoomAmounts[zoomLevels] = reduction;
    ++zoomLevels;
    reduction *= zoomIncrement;
    rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm);
    }
lmCleanup(&lm);
verboseTime(2, "further reductions");
return zoomLevels;
}
void itsaMake(int inCount, char *inputs[], char *output)
/* itsaMake - Make a suffix array file out of input DNA sequences.. */
{
verboseTimeInit();
bits64 maxGenomeSize = 1024LL*1024*1024*4;

itsaBaseToValInit();

/* Load all DNA, make sure names are unique, and alphabetize by name. */
struct dnaSeq *seqList = NULL, *seq;
struct hash *uniqSeqHash = hashNew(0);
bits64 totalDnaSize = 1;	/* FOr space between. */
int inputIx;
for (inputIx=0; inputIx<inCount; ++inputIx)
    {
    char * input = inputs[inputIx];
    struct dnaLoad *dl = dnaLoadOpen(input);
    while ((seq = dnaLoadNext(dl)) != NULL)
	{
	verbose(1, "read %s with %d bases\n", seq->name, seq->size);
	if (hashLookup(uniqSeqHash, seq->name))
	    errAbort("Input sequence name %s repeated, all must be unique.", seq->name);
	totalDnaSize +=  seq->size + 1;
	if (totalDnaSize > maxGenomeSize)
	    errAbort("Too much DNA. Can only handle up to %lld bases", maxGenomeSize);
	slAddHead(&seqList, seq);
	}
    dnaLoadClose(&dl);
    }
slSort(&seqList, dnaSeqCmpName);
verboseTime(1, "Loaded %lld bases in %d sequences", totalDnaSize, slCount(seqList));

/* Allocate big buffer for all DNA. */
DNA *allDna = globalAllDna = needHugeMem(totalDnaSize);
allDna[0] = 0;
bits64 chromOffset = 1;	/* Have zeroes between each chrom, and before and after. */

/* Copy DNA to a single big buffer, and create chromInfo on each sequence. */
struct chromInfo *chrom, *chromList = NULL;
for (seq = seqList; seq != NULL; seq = seq->next)
    {
    AllocVar(chrom);
    chrom->name = cloneString(seq->name);
    chrom->size = seq->size;
    chrom->offset = chromOffset;
    slAddHead(&chromList, chrom);
    toUpperN(seq->dna, seq->size);
    memcpy(allDna + chromOffset, seq->dna, seq->size + 1);
    chromOffset += seq->size + 1;
    }
slReverse(&chromList);

/* Free up separate dna sequences because we're going to need a lot of RAM soon. */


/* Allocate index array, and offset and list arrays. */
dnaSeqFreeList(&seqList);
bits32 *index13;
AllocArray(index13, itsaSlotCount);
bits32 *offsetArray = needHugeMem(totalDnaSize * sizeof(bits32));
bits32 *listArray = needHugeZeroedMem(totalDnaSize * sizeof(bits32));
verboseTime(1, "Allocated buffers %lld bytes total", 
	(long long)(9LL*totalDnaSize + itsaSlotCount*sizeof(bits32)));

/* Where normally we'd keep some sort of structure with a next element to form a list
 * of matching positions in each slot of our index,  to conserve memory we'll do this
 * with two parallel arrays.  Because we're such cheapskates in terms of memory we'll
 * (and still using 9*genomeSize bytes of RAM) we'll use these arrays for two different
 * purposes.   
 *     In the first phase they will together be used to form linked lists of
 * offsets, and the 13mer index will point to the first item in each list.  In this
 * phase the offsetArray contains offsets into the allDna structure, and the listArray
 * contains the next pointers for the list.  After the first phase we write out the
 * suffix array to disk.
 *     In the second phase we read the suffix array back into the offsetArray, and
 * use the listArray for the traverseArray.  We write out the traverse array to finish
 * things up. */


/* Load up all DNA buffer. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    verbose(2, "  About to do first pass index\n");
    indexChromPass1(chrom, allDna, offsetArray, listArray, index13);
    verbose(2, "  Done first pass index\n");
    }
verboseTime(1, "Done big bucket sort");
slReverse(&chromList);
itsaWriteMerged(chromList, allDna, offsetArray, listArray, index13, output);
}
static void itsaWriteMerged(struct chromInfo *chromList, DNA *allDna,
	bits32 *offsetArray, bits32 *listArray, bits32 *index13, char *output)
/* Write out a file that contains a single splix that is the merger of
 * all of the individual splixes in list.   As a side effect will replace
 * offsetArray with suffix array and listArray with traverse array */
{
FILE *f = mustOpen(output, "w+");

/** Allocate header and fill out easy constant fields. */
struct itsaFileHeader *header;
AllocVar(header);
header->majorVersion = ITSA_MAJOR_VERSION;
header->minorVersion = ITSA_MINOR_VERSION;

/* Figure out sizes of names and sequence for each chromosome. */
struct chromInfo *chrom;
bits32 chromNamesSize = 0;
bits64 dnaDiskSize = 1;	/* For initial zero. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
   {
   chromNamesSize += strlen(chrom->name) + 1;
   dnaDiskSize += chrom->size + 1;  /* Include separating zeroes. */
   }

/* Fill in  most of rest of header fields */
header->chromCount = slCount(chromList);
header->chromNamesSize = roundUpTo4(chromNamesSize);
header->dnaDiskSize = roundUpTo4(dnaDiskSize);
bits32 chromSizesSize = header->chromCount*sizeof(bits32);

/* Write header. */
mustWrite(f, header, sizeof(*header));

/* Write chromosome names. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    mustWrite(f, chrom->name, strlen(chrom->name)+1);
zeroPad(f, header->chromNamesSize - chromNamesSize);

/* Write chromosome sizes. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    mustWrite(f, &chrom->size, sizeof(chrom->size));
int chromSizesSizePad = chromSizesSize - header->chromCount * sizeof(bits32);
zeroPad(f, chromSizesSizePad);

/* Write out chromosome DNA and zeros before, between, and after. */
mustWrite(f, allDna, dnaDiskSize);
zeroPad(f, header->dnaDiskSize - dnaDiskSize);
verboseTime(1, "Wrote %lld bases of DNA including zero padding", header->dnaDiskSize);

/* Calculate and write suffix array. Convert index13 to index of array as opposed to index
 * of sequence. */
bits64 arraySize = 0;
off_t suffixArrayFileOffset = ftello(f);
int slotCount = itsaSlotCount;
int slotIx;
for (slotIx=0; slotIx < slotCount; ++slotIx)
    {
    int slotSize = finishAndWriteOneSlot(offsetArray, listArray, index13[slotIx], allDna, f);
    /* Convert index13 to hold the position in the suffix array where the first thing matching
     * the corresponding 13-base prefix is found. */
    if (slotSize != 0)
        index13[slotIx] = arraySize+1;	/* The +1 is so we can keep 0 for not found. */
    else
        index13[slotIx] = 0;
    arraySize += slotSize;
    if ((slotIx % 200000 == 0) && slotIx != 0)
	{
	verboseDot();
	if (slotIx % 10000000 == 0)
	    verbose(1, "fine sort bucket %d of %d\n", slotIx, slotCount);
	}
    }
verbose(1, "fine sort bucket %d of %d\n", slotCount, slotCount);
verboseTime(1, "Wrote %lld suffix array positions", arraySize);

/* Now we're done with the offsetArray and listArray buffers, so use them for the
 * next phase. */
bits32 *suffixArray = offsetArray;
offsetArray = NULL;	/* Help make some errors more obvious */
bits32 *traverseArray = listArray;
listArray = NULL;	/* Help make some errors more obvious */

/* Read the suffix array back from the file. */
fseeko(f, suffixArrayFileOffset, SEEK_SET);
mustRead(f, suffixArray, arraySize*sizeof(bits32));
verboseTime(1, "Read suffix array back in");

/* Calculate traverse array and cursor arrays */
memset(traverseArray, 0, arraySize*sizeof(bits32));
UBYTE *cursorArray = needHugeMem(arraySize);
itsaFillInTraverseArray(allDna, suffixArray, arraySize, traverseArray, cursorArray);
verboseTime(1, "Filled in traverseArray");

/* Write out traverse array. */
mustWrite(f, traverseArray, arraySize*sizeof(bits32));
verboseTime(1, "Wrote out traverseArray");

/* Write out 13-mer index. */
mustWrite(f, index13, itsaSlotCount*sizeof(bits32));
verboseTime(1, "Wrote out index13");

/* Write out bits of cursor array corresponding to index. */
for (slotIx=0; slotIx<itsaSlotCount; ++slotIx)
    {
    bits32 indexPos = index13[slotIx];
    if (indexPos == 0)
       fputc(0, f);
    else
       fputc(cursorArray[indexPos-1], f);
    }
verboseTime(1, "Wrote out cursors13");

/* Update a few fields in header, and go back and write it out again with
 * the correct magic number to indicate it's complete. */
header->magic = ITSA_MAGIC;
header->arraySize = arraySize;
header->size = sizeof(*header) 			// header
	+ header->chromNamesSize + 		// chromosome names
	+ header->chromCount * sizeof(bits32)	// chromosome sizes
	+ header->dnaDiskSize 			// dna sequence
	+ sizeof(bits32) * arraySize	 	// suffix array
	+ sizeof(bits32) * arraySize   		// traverse array
	+ sizeof(bits32) * itsaSlotCount 	// index13
	+ sizeof(UBYTE) * itsaSlotCount;	// cursors13

rewind(f);
mustWrite(f, header, sizeof(*header));
carefulClose(&f);
verbose(1, "Completed %s is %lld bytes\n", output, header->size);
}
Example #6
0
void bbFileCreate(
	char *inName, 	  /* Input file in a tabular bed format <chrom><start><end> + whatever. */
	char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */
	int blockSize,	  /* Number of items to bundle in r-tree.  1024 is good. */
	int itemsPerSlot, /* Number of items in lowest level of tree.  64 is good. */
	char *asText,	  /* Field definitions in a string */
	struct asObject *as,  /* Field definitions parsed out */
	boolean doCompress, /* If TRUE then compress data. */
	struct slName *extraIndexList,	/* List of extra indexes to add */
	char *outName)    /* BigBed output file name. */
/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */
{
/* Set up timing measures. */
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);

bits16 fieldCount = slCount(as->columnList);
bits16 extraIndexCount = slCount(extraIndexList);

struct bbExIndexMaker *eim = NULL;
if (extraIndexList != NULL)
    eim = bbExIndexMakerNew(extraIndexList, as);

/* Load in chromosome sizes. */
struct hash *chromSizesHash = NULL;

if (sizesIs2Bit)
    chromSizesHash = twoBitChromHash(chromSizes);
else
    chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "Read %d chromosomes and sizes from %s\n",  chromSizesHash->elCount, chromSizes);

/* Do first pass, mostly just scanning file and counting hits per chromosome. */
int minDiff = 0;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, 
    &minDiff, &aveSize, &bedCount, tabSep);
verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList));
verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize);

/* Open output file and write dummy header. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);

/* Write out autoSql string */
bits64 asOffset = ftell(f);
mustWrite(f, asText, strlen(asText) + 1);
verbose(2, "as definition has %d columns\n", fieldCount);

/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write out dummy header extension */
bits64 extHeaderOffset = ftell(f);
bits16 extHeaderSize = 64;
repeatCharOut(f, 0, extHeaderSize);

/* Write out extra index stuff if need be. */
bits64 extraIndexListOffset = 0;
bits64 extraIndexListEndOffset = 0;
if (extraIndexList != NULL)
    {
    extraIndexListOffset = ftell(f);
    int extraIndexSize = 16 + 4*1;   // Fixed record size 16, plus 1 times field size of 4 
    repeatCharOut(f, 0, extraIndexSize*extraIndexCount);
    extraIndexListEndOffset = ftell(f);
    }

/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);

/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);

/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits32 blockCount = 0;
bits32 maxBlockSize = 0;
struct bbiBoundsArray *boundsArray = NULL;
writeOne(f, bedCount);
if (bedCount > 0)
    {
    blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
    AllocArray(boundsArray, blockCount);
    lineFileRewind(lf);
    if (eim)
	bbExIndexMakerAllocChunkArrays(eim, bedCount);
    writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress,
	    f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, &maxBlockSize);
    }
verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", 
	(long long)bedCount, fieldCount);

/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount,
    blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);
freez(&boundsArray);
verboseTime(2, "index write");

/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];

/* Call monster zoom maker library function that bedGraphToBigWig also uses. */
int zoomLevels = 0;
if (bedCount > 0)
    {
    zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
	bedWriteReducedOnceReturnReducedTwice, fieldCount,
	doCompress, indexOffset - dataOffset, 
	usageList, resTryCount, resScales, resSizes, 
	zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);
    }

/* Write out extra indexes if need be. */
if (eim)
    {
    int i;
    for (i=0; i < eim->indexCount; ++i)
        {
	eim->fileOffsets[i] = ftell(f);
	maxBedNameSize = eim->maxFieldSize[i];
	qsort(eim->chunkArrayArray[i], bedCount, 
	    sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName);
	assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0]));
	bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), 
	    bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, 
	    sizeof(bits64) + sizeof(bits64), f);
	verboseTime(1, "Sorting and writing extra index %d", i);
	}
    }

/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    uncompressBufSize = max(maxBlockSize, maxZoomUncompSize);
    }

/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigBedSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits32 reserved32 = 0;
bits64 reserved64 = 0;

bits16 definedFieldCount = bedN;

/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, fieldCount);
writeOne(f, definedFieldCount);
writeOne(f, asOffset);
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, extHeaderOffset);
assert(ftell(f) == 64);

/* Write summary headers with data. */
int i;
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
    {
    verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
    writeOne(f, zoomAmounts[i]);
    writeOne(f, reserved32);
    writeOne(f, zoomDataOffsets[i]);
    writeOne(f, zoomIndexOffsets[i]);
    }
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
    {
    writeOne(f, reserved32);
    writeOne(f, reserved32);
    writeOne(f, reserved64);
    writeOne(f, reserved64);
    }

/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);

/* Write extended header */
fseek(f, extHeaderOffset, SEEK_SET);
writeOne(f, extHeaderSize);
writeOne(f, extraIndexCount);
writeOne(f, extraIndexListOffset);
repeatCharOut(f, 0, 52);    // reserved
assert(ftell(f) - extHeaderOffset == extHeaderSize);

/* Write extra index offsets if need be. */
if (extraIndexCount != 0)
    {
    fseek(f, extraIndexListOffset, SEEK_SET);
    int i;
    for (i=0; i<extraIndexCount; ++i)
        {
	// Write out fixed part of index info
	bits16 type = 0;    // bPlusTree type
	bits16 indexFieldCount = 1;
	writeOne(f, type);
	writeOne(f, indexFieldCount);
	writeOne(f, eim->fileOffsets[i]);
	repeatCharOut(f, 0, 4);  // reserved

	// Write out field list - easy this time because for now always only one field.
	bits16 fieldId = eim->indexFields[i];
	writeOne(f, fieldId);
	repeatCharOut(f, 0, 2); // reserved
	}
    assert(ftell(f) == extraIndexListEndOffset);
    }

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);


/* Clean up. */
lineFileClose(&lf);
carefulClose(&f);
freeHash(&chromSizesHash);
bbiChromUsageFreeList(&usageList);
asObjectFreeList(&as);
}
Example #7
0
void bedGraphToBigWig(char *inName, char *chromSizes, char *outName)
/* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
{
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
int minDiff = 0, i;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, 
    &minDiff, &aveSize, &bedCount);
verboseTime(2, "pass1");
verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", 
    slCount(usageList), inName, minDiff, aveSize, bedCount);

/* Write out dummy header, zoom offsets. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);

/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);

/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);

/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);

/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
writeOne(f, sectionCount);
struct bbiBoundsArray *boundsArray;
AllocArray(boundsArray, sectionCount);
lineFileRewind(lf);
bits32 maxSectionSize = 0;
writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f,
	resTryCount, resScales, resSizes, doCompress, &maxSectionSize);
verboseTime(2, "pass2");

/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount,
    blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
    indexOffset, f);
verboseTime(2, "index write");

/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];

/* Call monster zoom maker library function that bedToBigBed also uses. */
int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
    bedGraphWriteReducedOnceReturnReducedTwice, 4,
    doCompress, indexOffset - dataOffset, 
    usageList, resTryCount, resScales, resSizes, 
    zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);


/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
    {
    int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
    uncompressBufSize = max(maxSectionSize, maxZoomUncompSize);
    }

/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;

/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, reserved16);	// fieldCount
writeOne(f, reserved16);	// definedFieldCount
writeOne(f, reserved64);	// autoSqlOffset
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64);	// nameIndexOffset
assert(ftell(f) == 64);

/* Write summary headers with data. */
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
    {
    verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
    writeOne(f, zoomAmounts[i]);
    writeOne(f, reserved32);
    writeOne(f, zoomDataOffsets[i]);
    writeOne(f, zoomIndexOffsets[i]);
    }
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
    {
    writeOne(f, reserved32);
    writeOne(f, reserved32);
    writeOne(f, reserved64);
    writeOne(f, reserved64);
    }

/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);

/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);

lineFileClose(&lf);
carefulClose(&f);
}