Ejemplo n.º 1
0
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, 
	int sectionCount, boolean doCompress, FILE *f, 
	int resTryCount, int resScales[], int resSizes[], 
	struct bbExIndexMaker *eim,  int bedCount,
	bits16 fieldCount, bits32 *retMaxBlockSize)
/* Read through lf, writing it in f.  Save starting points of blocks (every itemsPerSlot)
 * to boundsArray */
{
int maxBlockSize = 0;
struct bbiChromUsage *usage = usageList;
char *line, *row[fieldCount+1];
int lastField = fieldCount-1;
int itemIx = 0, sectionIx = 0;
bits64 blockStartOffset = 0;
int startPos = 0, endPos = 0;
bits32 chromId = 0;
struct dyString *stream = dyStringNew(0);

/* Will keep track of some things that help us determine how much to reduce. */
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
boolean atEnd = FALSE, sameChrom = FALSE;
bits32 start = 0, end = 0;
char *chrom = NULL;
struct bed *bed;
AllocVar(bed);

/* Help keep track of which beds are in current chunk so as to write out
 * namedChunks to eim if need be. */
long sectionStartIx = 0, sectionEndIx = 0;

for (;;)
    {
    /* Get next line of input if any. */
    if (lineFileNextReal(lf, &line))
	{
	/* Chop up line and make sure the word count is right. */
	int wordCount;
	if (tabSep)
	    wordCount = chopTabs(line, row);
	else
	    wordCount = chopLine(line, row);
	lineFileExpectWords(lf, fieldCount, wordCount);

	loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE);

	chrom = bed->chrom;
	start = bed->chromStart;
	end = bed->chromEnd;

	sameChrom = sameString(chrom, usage->name);
	}
    else  /* No next line */
	{
	atEnd = TRUE;
	}


    /* Check conditions that would end block and save block info and advance to next if need be. */
    if (atEnd || !sameChrom || itemIx >= itemsPerSlot)
        {
	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxBlockSize)
	    maxBlockSize = stream->stringSize;
	if (doCompress)
            {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);

            // keep around an area of scratch memory
            static int compBufSize = 0;
            static char *compBuf = NULL;
            // check to see if buffer needed for compression is big enough
            if (compBufSize < maxCompSize)
                {
                // free up the old not-big-enough piece
                freez(&compBuf); // freez knows bout NULL

                // get new scratch area
                compBufSize = maxCompSize;
                compBuf = needLargeMem(compBufSize);
                }

	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);
	dyStringClear(stream);

	/* Save block offset and size for all named chunks in this section. */
	if (eim != NULL)
	    {
	    bits64 blockEndOffset = ftell(f);
	    bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset,
		sectionStartIx, sectionEndIx);
	    sectionStartIx = sectionEndIx;
	    }

	/* Save info on existing block. */
	struct bbiBoundsArray *b = &bounds[sectionIx];
	b->offset = blockStartOffset;
	b->range.chromIx = chromId;
	b->range.start = startPos;
	b->range.end = endPos;
	++sectionIx;
	itemIx = 0;

	if (atEnd)
	    break;
	}

    /* Advance to next chromosome if need be and get chromosome id. */
    if (!sameChrom)
        {
	usage = usage->next;
	assert(usage != NULL);
	assert(sameString(chrom, usage->name));
	for (resTry = 0; resTry < resTryCount; ++resTry)
	    resEnds[resTry] = 0;
	}
    chromId = usage->id;

    /* At start of block we save a lot of info. */
    if (itemIx == 0)
        {
	blockStartOffset = ftell(f);
	startPos = start;
	endPos = end;
	}
    /* Otherwise just update end. */
        {
	if (endPos < end)
	    endPos = end;
	/* No need to update startPos since list is sorted. */
	}

    /* Save name into namedOffset if need be. */
    if (eim != NULL)
	{
	bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx);
	sectionEndIx += 1;
	}

    /* Write out data. */
    dyStringWriteOne(stream, chromId);
    dyStringWriteOne(stream, start);
    dyStringWriteOne(stream, end);
    if (fieldCount > 3)
        {
	int i;
	/* Write 3rd through next to last field and a tab separator. */
	for (i=3; i<lastField; ++i)
	    {
	    char *s = row[i];
	    dyStringAppend(stream, s);
	    dyStringAppendC(stream, '\t');
	    }
	/* Write last field and terminal zero */
	char *s = row[lastField];
	dyStringAppend(stream, s);
	}
    dyStringAppendC(stream, 0);

    itemIx += 1;

    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}
    }
assert(sectionIx == sectionCount);
freez(&bed);
*retMaxBlockSize = maxBlockSize;
}
Ejemplo n.º 2
0
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList)
/* Load one bed file.  Make sure all lines have the correct number of fields.
 * Put results in *pList. */
{
char *words[64], *line, *dupe;
int wordCount;
struct bedStub *bed;
struct asObject *asObj = getAsObj(bedSize);
int fieldCount = getFieldCount(bedSize, asObj);
struct bed *validateBed;
AllocVar(validateBed);

verbose(1, "Reading %s\n", lf->fileName);
while (lineFileNextReal(lf, &line))
    {
    if (hasBin)
	nextWord(&line);
    dupe = cloneString(line);
    if (strictTab)
	wordCount = chopTabs(line, words);
    else
	wordCount = chopLine(line, words);
    /* ignore empty lines	*/
    if (0 == wordCount)
	continue;
    lineFileExpectWords(lf, fieldCount, wordCount);

    if (type)  
        // TODO also, may need to add a flag to the validateBed() interface to support -allowNegativeScores when not isCt
        //  although can probably get away without it since usually -allowNegativeScores is used by ct which has already verified it.
        //  thus -allowNegativeScores is unlikely to be used with -type.
	{
	loadAndValidateBed(words, typeBedN, fieldCount, lf, validateBed, asObj, FALSE);
	checkChromNameAndSize(lf, validateBed->chrom, validateBed->chromEnd);
	}

    AllocVar(bed);
    bed->chrom = cloneString(words[0]);
    bed->chromStart = lineFileNeedNum(lf, words, 1);
    bed->chromEnd = lineFileNeedNum(lf, words, 2);
    if (! noStrict)
	{
	if ((bed->chromEnd < 1) && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
	if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n"
		     "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n",
		     lf->lineIx, dupe, bed->chromStart);
	if (bed->chromStart > bed->chromEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
		     lf->lineIx, dupe, bed->chromStart, bed->chromEnd);
	}
    bed->line = dupe;
    slAddHead(pList, bed);
    }

if (asObj)
    asObjectFreeList(&asObj);
freez(&validateBed);
}