Exemple #1
0
static bits64 bbiWriteSummaryAndIndexComp(struct bbiSummary *summaryList, 
	int blockSize, int itemsPerSlot, FILE *f)
/* Write out summary and index to summary uncompressed, returning start position of
 * summary index. */
{
bits32 i, count = slCount(summaryList);
struct bbiSummary **summaryArray;
AllocArray(summaryArray, count);
writeOne(f, count);
struct bbiSummary *summary = summaryList;

/* Figure out max size of uncompressed and compressed blocks. */
bits32 itemSize = sizeof(summary->chromId) + sizeof(summary->start) + sizeof(summary->end) + sizeof(summary->validCount) + 4*sizeof(float);
int uncBufSize = itemSize * itemsPerSlot;
char uncBuf[uncBufSize];
int compBufSize = zCompBufSize(uncBufSize);
char compBuf[compBufSize];

/* Loop through compressing and writing one slot at a time. */
bits32 itemsLeft = count;
int sumIx = 0;
while (itemsLeft > 0)
    {
    bits32 itemsInSlot = itemsLeft;
    if (itemsInSlot > itemsPerSlot)
         itemsInSlot = itemsPerSlot;
    char *writePt = uncBuf;

    bits64 filePos = ftell(f);
    for (i=0; i<itemsInSlot; ++i)
        {
	summaryArray[sumIx++] = summary;
	memWriteOne(&writePt, summary->chromId);
	memWriteOne(&writePt, summary->start);
	memWriteOne(&writePt, summary->end);
	memWriteOne(&writePt, summary->validCount);
	memWriteFloat(&writePt, summary->minVal);
	memWriteFloat(&writePt, summary->maxVal);
	memWriteFloat(&writePt, summary->sumData);
	memWriteFloat(&writePt, summary->sumSquares);
	summary->fileOffset = filePos;
	summary = summary->next;
	if (summary == NULL)
	    break;
	}

    bits32 uncSize = writePt - uncBuf;
    int compSize = zCompress(uncBuf, uncSize, compBuf, compBufSize);
    mustWrite(f, compBuf, compSize);

    itemsLeft -= itemsInSlot;
    }
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(summaryArray, sizeof(summaryArray[0]), count,
    blockSize, itemsPerSlot, NULL, bbiSummaryFetchKey, bbiSummaryFetchOffset, 
    indexOffset, f);
freez(&summaryArray);
return indexOffset;
}
Exemple #2
0
void bbiSumOutStreamFlush(struct bbiSumOutStream *stream)
/* Flush out any pending input. */
{
if (stream->elCount != 0)
    {
    int uncSize = stream->elCount * sizeof(stream->array[0]);
    if (stream->doCompress)
        {
	int compBufSize = zCompBufSize(uncSize);
	char compBuf[compBufSize];
	int compSize = zCompress(stream->array, uncSize, compBuf, compBufSize);
	mustWrite(stream->f, compBuf, compSize);
	}
    else
        {
	mustWrite(stream->f, stream->array, uncSize);
	}
    stream->elCount = 0;
    }
}
Exemple #3
0
static int bwgSectionWrite(struct bwgSection *section, boolean doCompress, FILE *f)
/* Write out section to file, filling in section->fileOffset. */
{
UBYTE type = section->type;
UBYTE reserved8 = 0;
int itemSize;
switch (section->type)
    {
    case bwgTypeBedGraph:
        itemSize = 12;
	break;
    case bwgTypeVariableStep:
        itemSize = 8;
	break;
    case bwgTypeFixedStep:
        itemSize = 4;
	break;
    default:
        itemSize = 0;  // Suppress compiler warning
	internalErr();
	break;
    }
int fixedSize = sizeof(section->chromId) + sizeof(section->start) + sizeof(section->end) + 
     sizeof(section->itemStep) + sizeof(section->itemSpan) + sizeof(type) + sizeof(reserved8) +
     sizeof(section->itemCount);
int bufSize = section->itemCount * itemSize + fixedSize;
char buf[bufSize];
char *bufPt = buf;

section->fileOffset = ftell(f);
memWriteOne(&bufPt, section->chromId);
memWriteOne(&bufPt, section->start);
memWriteOne(&bufPt, section->end);
memWriteOne(&bufPt, section->itemStep);
memWriteOne(&bufPt, section->itemSpan);
memWriteOne(&bufPt, type);
memWriteOne(&bufPt, reserved8);
memWriteOne(&bufPt, section->itemCount);

int i;
switch (section->type)
    {
    case bwgTypeBedGraph:
	{
	struct bwgBedGraphItem *item = section->items.bedGraphList;
	for (item = section->items.bedGraphList; item != NULL; item = item->next)
	    {
	    memWriteOne(&bufPt, item->start);
	    memWriteOne(&bufPt, item->end);
	    memWriteOne(&bufPt, item->val);
	    }
	break;
	}
    case bwgTypeVariableStep:
	{
	struct bwgVariableStepPacked *items = section->items.variableStepPacked;
	for (i=0; i<section->itemCount; ++i)
	    {
	    memWriteOne(&bufPt, items->start);
	    memWriteOne(&bufPt, items->val);
	    items += 1;
	    }
	break;
	}
    case bwgTypeFixedStep:
	{
	struct bwgFixedStepPacked *items = section->items.fixedStepPacked;
	for (i=0; i<section->itemCount; ++i)
	    {
	    memWriteOne(&bufPt, items->val);
	    items += 1;
	    }
	break;
	}
    default:
        internalErr();
	break;
    }
assert(bufSize == (bufPt - buf) );

if (doCompress)
    {
    size_t maxCompSize = zCompBufSize(bufSize);
    char compBuf[maxCompSize];
    int compSize = zCompress(buf, bufSize, compBuf, maxCompSize);
    mustWrite(f, compBuf, compSize);
    }
else
    mustWrite(f, buf, bufSize);
return bufSize;
}
Exemple #4
0
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, 
	int sectionCount, boolean doCompress, FILE *f, 
	int resTryCount, int resScales[], int resSizes[], 
	struct bbExIndexMaker *eim,  int bedCount,
	bits16 fieldCount, bits32 *retMaxBlockSize)
/* Read through lf, writing it in f.  Save starting points of blocks (every itemsPerSlot)
 * to boundsArray */
{
int maxBlockSize = 0;
struct bbiChromUsage *usage = usageList;
char *line, *row[fieldCount+1];
int lastField = fieldCount-1;
int itemIx = 0, sectionIx = 0;
bits64 blockStartOffset = 0;
int startPos = 0, endPos = 0;
bits32 chromId = 0;
struct dyString *stream = dyStringNew(0);

/* Will keep track of some things that help us determine how much to reduce. */
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
boolean atEnd = FALSE, sameChrom = FALSE;
bits32 start = 0, end = 0;
char *chrom = NULL;
struct bed *bed;
AllocVar(bed);

/* Help keep track of which beds are in current chunk so as to write out
 * namedChunks to eim if need be. */
long sectionStartIx = 0, sectionEndIx = 0;

for (;;)
    {
    /* Get next line of input if any. */
    if (lineFileNextReal(lf, &line))
	{
	/* Chop up line and make sure the word count is right. */
	int wordCount;
	if (tabSep)
	    wordCount = chopTabs(line, row);
	else
	    wordCount = chopLine(line, row);
	lineFileExpectWords(lf, fieldCount, wordCount);

	loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE);

	chrom = bed->chrom;
	start = bed->chromStart;
	end = bed->chromEnd;

	sameChrom = sameString(chrom, usage->name);
	}
    else  /* No next line */
	{
	atEnd = TRUE;
	}


    /* Check conditions that would end block and save block info and advance to next if need be. */
    if (atEnd || !sameChrom || itemIx >= itemsPerSlot)
        {
	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxBlockSize)
	    maxBlockSize = stream->stringSize;
	if (doCompress)
            {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);

            // keep around an area of scratch memory
            static int compBufSize = 0;
            static char *compBuf = NULL;
            // check to see if buffer needed for compression is big enough
            if (compBufSize < maxCompSize)
                {
                // free up the old not-big-enough piece
                freez(&compBuf); // freez knows bout NULL

                // get new scratch area
                compBufSize = maxCompSize;
                compBuf = needLargeMem(compBufSize);
                }

	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);
	dyStringClear(stream);

	/* Save block offset and size for all named chunks in this section. */
	if (eim != NULL)
	    {
	    bits64 blockEndOffset = ftell(f);
	    bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset,
		sectionStartIx, sectionEndIx);
	    sectionStartIx = sectionEndIx;
	    }

	/* Save info on existing block. */
	struct bbiBoundsArray *b = &bounds[sectionIx];
	b->offset = blockStartOffset;
	b->range.chromIx = chromId;
	b->range.start = startPos;
	b->range.end = endPos;
	++sectionIx;
	itemIx = 0;

	if (atEnd)
	    break;
	}

    /* Advance to next chromosome if need be and get chromosome id. */
    if (!sameChrom)
        {
	usage = usage->next;
	assert(usage != NULL);
	assert(sameString(chrom, usage->name));
	for (resTry = 0; resTry < resTryCount; ++resTry)
	    resEnds[resTry] = 0;
	}
    chromId = usage->id;

    /* At start of block we save a lot of info. */
    if (itemIx == 0)
        {
	blockStartOffset = ftell(f);
	startPos = start;
	endPos = end;
	}
    /* Otherwise just update end. */
        {
	if (endPos < end)
	    endPos = end;
	/* No need to update startPos since list is sorted. */
	}

    /* Save name into namedOffset if need be. */
    if (eim != NULL)
	{
	bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx);
	sectionEndIx += 1;
	}

    /* Write out data. */
    dyStringWriteOne(stream, chromId);
    dyStringWriteOne(stream, start);
    dyStringWriteOne(stream, end);
    if (fieldCount > 3)
        {
	int i;
	/* Write 3rd through next to last field and a tab separator. */
	for (i=3; i<lastField; ++i)
	    {
	    char *s = row[i];
	    dyStringAppend(stream, s);
	    dyStringAppendC(stream, '\t');
	    }
	/* Write last field and terminal zero */
	char *s = row[lastField];
	dyStringAppend(stream, s);
	}
    dyStringAppendC(stream, 0);

    itemIx += 1;

    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}
    }
assert(sectionIx == sectionCount);
freez(&bed);
*retMaxBlockSize = maxBlockSize;
}
Exemple #5
0
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f,
	int resTryCount, int resScales[], int resSizes[], 
	boolean doCompress, bits32 *retMaxSectionSize)
/* Read through lf, chunking it into sections that get written to f.  Save info
 * about sections in bounds. */
{
int maxSectionSize = 0;
struct bbiChromUsage *usage = usageList;
int itemIx = 0, sectionIx = 0;
bits32 reserved32 = 0;
UBYTE reserved8 = 0;
struct sectionItem items[itemsPerSlot];
struct sectionItem *lastB = NULL;
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
struct dyString *stream = dyStringNew(0);

/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);

for (;;)
    {
    /* Get next line of input if any. */
    char *row[5];
    int rowSize = lineFileChopNext(lf, row, ArraySize(row));

    /* Figure out whether need to output section. */
    boolean sameChrom = FALSE;
    if (rowSize > 0)
	sameChrom = sameString(row[0], usage->name);
    if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom)
        {
	/* Figure out section position. */
	bits32 chromId = usage->id;
	bits32 sectionStart = items[0].start;
	bits32 sectionEnd = items[itemIx-1].end;

	/* Save section info for indexing. */
	assert(sectionIx < sectionCount);
	struct bbiBoundsArray *section = &bounds[sectionIx++];
	section->offset = ftell(f);
	section->range.chromIx = chromId;
	section->range.start = sectionStart;
	section->range.end = sectionEnd;

	/* Output section header to stream. */
	dyStringClear(stream);
	UBYTE type = bwgTypeBedGraph;
	bits16 itemCount = itemIx;
	dyStringWriteOne(stream, chromId);			// chromId
	dyStringWriteOne(stream, sectionStart);		// start
	dyStringWriteOne(stream, sectionEnd);	// end
	dyStringWriteOne(stream, reserved32);		// itemStep
	dyStringWriteOne(stream, reserved32);		// itemSpan
	dyStringWriteOne(stream, type);			// type
	dyStringWriteOne(stream, reserved8);			// reserved
	dyStringWriteOne(stream, itemCount);			// itemCount

	/* Output each item in section to stream. */
	int i;
	for (i=0; i<itemIx; ++i)
	    {
	    struct sectionItem *item = &items[i];
	    dyStringWriteOne(stream, item->start);
	    dyStringWriteOne(stream, item->end);
	    dyStringWriteOne(stream, item->val);
	    }

	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxSectionSize)
	    maxSectionSize = stream->stringSize;
	if (doCompress)
	    {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);
	    char compBuf[maxCompSize];
	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);


	/* If at end of input we are done. */
	if (rowSize == 0)
	    break;

	/* Set up for next section. */
	itemIx = 0;

	if (!sameChrom)
	    {
	    usage = usage->next;
	    assert(usage != NULL);
            if (!sameString(row[0], usage->name))
                errAbort("read %s, expecting %s on line %d in file %s\n", 
                    row[0], usage->name, lf->lineIx, lf->fileName);
	    assert(sameString(row[0], usage->name));
	    lastB = NULL;
	    for (resTry = 0; resTry < resTryCount; ++resTry)
		resEnds[resTry] = 0;
	    }
	}

    /* Parse out input. */
    lineFileExpectWords(lf, 4, rowSize);
    bits32 start = lineFileNeedNum(lf, row, 1);
    bits32 end = lineFileNeedNum(lf, row, 2);
    float val = lineFileNeedDouble(lf, row, 3);

    /* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */
    if (start > end)
        errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName);
    if (lastB != NULL)
        {
	if (lastB->start > start)
	    errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName);
	if (lastB->end > start)
	    errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName);
	}


    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}

    /* Save values in output array. */
    struct sectionItem *b = &items[itemIx];
    b->start = start;
    b->end = end;
    b->val = val;
    lastB = b;
    itemIx += 1;
    }
assert(sectionIx == sectionCount);

*retMaxSectionSize = maxSectionSize;
}