static bits64 bbiWriteSummaryAndIndexComp(struct bbiSummary *summaryList, int blockSize, int itemsPerSlot, FILE *f) /* Write out summary and index to summary uncompressed, returning start position of * summary index. */ { bits32 i, count = slCount(summaryList); struct bbiSummary **summaryArray; AllocArray(summaryArray, count); writeOne(f, count); struct bbiSummary *summary = summaryList; /* Figure out max size of uncompressed and compressed blocks. */ bits32 itemSize = sizeof(summary->chromId) + sizeof(summary->start) + sizeof(summary->end) + sizeof(summary->validCount) + 4*sizeof(float); int uncBufSize = itemSize * itemsPerSlot; char uncBuf[uncBufSize]; int compBufSize = zCompBufSize(uncBufSize); char compBuf[compBufSize]; /* Loop through compressing and writing one slot at a time. */ bits32 itemsLeft = count; int sumIx = 0; while (itemsLeft > 0) { bits32 itemsInSlot = itemsLeft; if (itemsInSlot > itemsPerSlot) itemsInSlot = itemsPerSlot; char *writePt = uncBuf; bits64 filePos = ftell(f); for (i=0; i<itemsInSlot; ++i) { summaryArray[sumIx++] = summary; memWriteOne(&writePt, summary->chromId); memWriteOne(&writePt, summary->start); memWriteOne(&writePt, summary->end); memWriteOne(&writePt, summary->validCount); memWriteFloat(&writePt, summary->minVal); memWriteFloat(&writePt, summary->maxVal); memWriteFloat(&writePt, summary->sumData); memWriteFloat(&writePt, summary->sumSquares); summary->fileOffset = filePos; summary = summary->next; if (summary == NULL) break; } bits32 uncSize = writePt - uncBuf; int compSize = zCompress(uncBuf, uncSize, compBuf, compBufSize); mustWrite(f, compBuf, compSize); itemsLeft -= itemsInSlot; } bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(summaryArray, sizeof(summaryArray[0]), count, blockSize, itemsPerSlot, NULL, bbiSummaryFetchKey, bbiSummaryFetchOffset, indexOffset, f); freez(&summaryArray); return indexOffset; }
void bbiSumOutStreamFlush(struct bbiSumOutStream *stream) /* Flush out any pending input. */ { if (stream->elCount != 0) { int uncSize = stream->elCount * sizeof(stream->array[0]); if (stream->doCompress) { int compBufSize = zCompBufSize(uncSize); char compBuf[compBufSize]; int compSize = zCompress(stream->array, uncSize, compBuf, compBufSize); mustWrite(stream->f, compBuf, compSize); } else { mustWrite(stream->f, stream->array, uncSize); } stream->elCount = 0; } }
static int bwgSectionWrite(struct bwgSection *section, boolean doCompress, FILE *f) /* Write out section to file, filling in section->fileOffset. */ { UBYTE type = section->type; UBYTE reserved8 = 0; int itemSize; switch (section->type) { case bwgTypeBedGraph: itemSize = 12; break; case bwgTypeVariableStep: itemSize = 8; break; case bwgTypeFixedStep: itemSize = 4; break; default: itemSize = 0; // Suppress compiler warning internalErr(); break; } int fixedSize = sizeof(section->chromId) + sizeof(section->start) + sizeof(section->end) + sizeof(section->itemStep) + sizeof(section->itemSpan) + sizeof(type) + sizeof(reserved8) + sizeof(section->itemCount); int bufSize = section->itemCount * itemSize + fixedSize; char buf[bufSize]; char *bufPt = buf; section->fileOffset = ftell(f); memWriteOne(&bufPt, section->chromId); memWriteOne(&bufPt, section->start); memWriteOne(&bufPt, section->end); memWriteOne(&bufPt, section->itemStep); memWriteOne(&bufPt, section->itemSpan); memWriteOne(&bufPt, type); memWriteOne(&bufPt, reserved8); memWriteOne(&bufPt, section->itemCount); int i; switch (section->type) { case bwgTypeBedGraph: { struct bwgBedGraphItem *item = section->items.bedGraphList; for (item = section->items.bedGraphList; item != NULL; item = item->next) { memWriteOne(&bufPt, item->start); memWriteOne(&bufPt, item->end); memWriteOne(&bufPt, item->val); } break; } case bwgTypeVariableStep: { struct bwgVariableStepPacked *items = section->items.variableStepPacked; for (i=0; i<section->itemCount; ++i) { memWriteOne(&bufPt, items->start); memWriteOne(&bufPt, items->val); items += 1; } break; } case bwgTypeFixedStep: { struct bwgFixedStepPacked *items = section->items.fixedStepPacked; for (i=0; i<section->itemCount; ++i) { memWriteOne(&bufPt, items->val); items += 1; } break; } default: internalErr(); break; } assert(bufSize == (bufPt - buf) ); if (doCompress) { size_t maxCompSize = zCompBufSize(bufSize); char compBuf[maxCompSize]; int compSize = zCompress(buf, bufSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, buf, bufSize); return bufSize; }
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, boolean doCompress, FILE *f, int resTryCount, int resScales[], int resSizes[], struct bbExIndexMaker *eim, int bedCount, bits16 fieldCount, bits32 *retMaxBlockSize) /* Read through lf, writing it in f. Save starting points of blocks (every itemsPerSlot) * to boundsArray */ { int maxBlockSize = 0; struct bbiChromUsage *usage = usageList; char *line, *row[fieldCount+1]; int lastField = fieldCount-1; int itemIx = 0, sectionIx = 0; bits64 blockStartOffset = 0; int startPos = 0, endPos = 0; bits32 chromId = 0; struct dyString *stream = dyStringNew(0); /* Will keep track of some things that help us determine how much to reduce. */ bits32 resEnds[resTryCount]; int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; boolean atEnd = FALSE, sameChrom = FALSE; bits32 start = 0, end = 0; char *chrom = NULL; struct bed *bed; AllocVar(bed); /* Help keep track of which beds are in current chunk so as to write out * namedChunks to eim if need be. */ long sectionStartIx = 0, sectionEndIx = 0; for (;;) { /* Get next line of input if any. */ if (lineFileNextReal(lf, &line)) { /* Chop up line and make sure the word count is right. */ int wordCount; if (tabSep) wordCount = chopTabs(line, row); else wordCount = chopLine(line, row); lineFileExpectWords(lf, fieldCount, wordCount); loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE); chrom = bed->chrom; start = bed->chromStart; end = bed->chromEnd; sameChrom = sameString(chrom, usage->name); } else /* No next line */ { atEnd = TRUE; } /* Check conditions that would end block and save block info and advance to next if need be. */ if (atEnd || !sameChrom || itemIx >= itemsPerSlot) { /* Save stream to file, compressing if need be. */ if (stream->stringSize > maxBlockSize) maxBlockSize = stream->stringSize; if (doCompress) { size_t maxCompSize = zCompBufSize(stream->stringSize); // keep around an area of scratch memory static int compBufSize = 0; static char *compBuf = NULL; // check to see if buffer needed for compression is big enough if (compBufSize < maxCompSize) { // free up the old not-big-enough piece freez(&compBuf); // freez knows bout NULL // get new scratch area compBufSize = maxCompSize; compBuf = needLargeMem(compBufSize); } int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, stream->string, stream->stringSize); dyStringClear(stream); /* Save block offset and size for all named chunks in this section. */ if (eim != NULL) { bits64 blockEndOffset = ftell(f); bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset, sectionStartIx, sectionEndIx); sectionStartIx = sectionEndIx; } /* Save info on existing block. */ struct bbiBoundsArray *b = &bounds[sectionIx]; b->offset = blockStartOffset; b->range.chromIx = chromId; b->range.start = startPos; b->range.end = endPos; ++sectionIx; itemIx = 0; if (atEnd) break; } /* Advance to next chromosome if need be and get chromosome id. */ if (!sameChrom) { usage = usage->next; assert(usage != NULL); assert(sameString(chrom, usage->name)); for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; } chromId = usage->id; /* At start of block we save a lot of info. */ if (itemIx == 0) { blockStartOffset = ftell(f); startPos = start; endPos = end; } /* Otherwise just update end. */ { if (endPos < end) endPos = end; /* No need to update startPos since list is sorted. */ } /* Save name into namedOffset if need be. */ if (eim != NULL) { bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx); sectionEndIx += 1; } /* Write out data. */ dyStringWriteOne(stream, chromId); dyStringWriteOne(stream, start); dyStringWriteOne(stream, end); if (fieldCount > 3) { int i; /* Write 3rd through next to last field and a tab separator. */ for (i=3; i<lastField; ++i) { char *s = row[i]; dyStringAppend(stream, s); dyStringAppendC(stream, '\t'); } /* Write last field and terminal zero */ char *s = row[lastField]; dyStringAppend(stream, s); } dyStringAppendC(stream, 0); itemIx += 1; /* Do zoom counting. */ for (resTry = 0; resTry < resTryCount; ++resTry) { bits32 resEnd = resEnds[resTry]; if (start >= resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = start + resScales[resTry]; } while (end > resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = resEnd + resScales[resTry]; } } } assert(sectionIx == sectionCount); freez(&bed); *retMaxBlockSize = maxBlockSize; }
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f, int resTryCount, int resScales[], int resSizes[], boolean doCompress, bits32 *retMaxSectionSize) /* Read through lf, chunking it into sections that get written to f. Save info * about sections in bounds. */ { int maxSectionSize = 0; struct bbiChromUsage *usage = usageList; int itemIx = 0, sectionIx = 0; bits32 reserved32 = 0; UBYTE reserved8 = 0; struct sectionItem items[itemsPerSlot]; struct sectionItem *lastB = NULL; bits32 resEnds[resTryCount]; int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; struct dyString *stream = dyStringNew(0); /* remove initial browser and track lines */ lineFileRemoveInitialCustomTrackLines(lf); for (;;) { /* Get next line of input if any. */ char *row[5]; int rowSize = lineFileChopNext(lf, row, ArraySize(row)); /* Figure out whether need to output section. */ boolean sameChrom = FALSE; if (rowSize > 0) sameChrom = sameString(row[0], usage->name); if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom) { /* Figure out section position. */ bits32 chromId = usage->id; bits32 sectionStart = items[0].start; bits32 sectionEnd = items[itemIx-1].end; /* Save section info for indexing. */ assert(sectionIx < sectionCount); struct bbiBoundsArray *section = &bounds[sectionIx++]; section->offset = ftell(f); section->range.chromIx = chromId; section->range.start = sectionStart; section->range.end = sectionEnd; /* Output section header to stream. */ dyStringClear(stream); UBYTE type = bwgTypeBedGraph; bits16 itemCount = itemIx; dyStringWriteOne(stream, chromId); // chromId dyStringWriteOne(stream, sectionStart); // start dyStringWriteOne(stream, sectionEnd); // end dyStringWriteOne(stream, reserved32); // itemStep dyStringWriteOne(stream, reserved32); // itemSpan dyStringWriteOne(stream, type); // type dyStringWriteOne(stream, reserved8); // reserved dyStringWriteOne(stream, itemCount); // itemCount /* Output each item in section to stream. */ int i; for (i=0; i<itemIx; ++i) { struct sectionItem *item = &items[i]; dyStringWriteOne(stream, item->start); dyStringWriteOne(stream, item->end); dyStringWriteOne(stream, item->val); } /* Save stream to file, compressing if need be. */ if (stream->stringSize > maxSectionSize) maxSectionSize = stream->stringSize; if (doCompress) { size_t maxCompSize = zCompBufSize(stream->stringSize); char compBuf[maxCompSize]; int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, stream->string, stream->stringSize); /* If at end of input we are done. */ if (rowSize == 0) break; /* Set up for next section. */ itemIx = 0; if (!sameChrom) { usage = usage->next; assert(usage != NULL); if (!sameString(row[0], usage->name)) errAbort("read %s, expecting %s on line %d in file %s\n", row[0], usage->name, lf->lineIx, lf->fileName); assert(sameString(row[0], usage->name)); lastB = NULL; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; } } /* Parse out input. */ lineFileExpectWords(lf, 4, rowSize); bits32 start = lineFileNeedNum(lf, row, 1); bits32 end = lineFileNeedNum(lf, row, 2); float val = lineFileNeedDouble(lf, row, 3); /* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */ if (start > end) errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName); if (lastB != NULL) { if (lastB->start > start) errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName); if (lastB->end > start) errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName); } /* Do zoom counting. */ for (resTry = 0; resTry < resTryCount; ++resTry) { bits32 resEnd = resEnds[resTry]; if (start >= resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = start + resScales[resTry]; } while (end > resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = resEnd + resScales[resTry]; } } /* Save values in output array. */ struct sectionItem *b = &items[itemIx]; b->start = start; b->end = end; b->val = val; lastB = b; itemIx += 1; } assert(sectionIx == sectionCount); *retMaxSectionSize = maxSectionSize; }