Ejemplo n.º 1
0
static void bigWigClick(struct trackDb *tdb, char *fileName)
/* Display details for BigWig data tracks. */
{
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = NULL;
struct lm *lm = lmInit(0);
struct bbiInterval *bbList = NULL;
char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery");

unsigned maxWTQ = 0;
if (isNotEmpty(maxWinToQuery))
    maxWTQ = sqlUnsigned(maxWinToQuery);

if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart))
    {
    bbi = bigWigFileOpen(fileName);
    bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm);
    }

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, BASE_1(winStart));
sprintLongWithCommas(num2Buf, winEnd);
printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf );
sprintLongWithCommas(num1Buf, winEnd-winStart);
printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf);

if (bbList != NULL)
    {
    bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd);
    }
else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart))
    {
    sprintLongWithCommas(num1Buf, maxWTQ);
    printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf);
    }
else
    {
    printf("<P>No data overlapping current position.</P>");
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
Ejemplo n.º 2
0
/*	The single externally visible routine.
 *	Future improvements will need to add a couple more arguments to
 *	satisify the needs of the command line version and its options.
 *	Currently, this is used only in customTrack input parsing.
 */
void wigAsciiToBinary( char *wigAscii, char *wigFile, char *wibFile,
   double *upperLimit, double *lowerLimit, struct wigEncodeOptions *options)
/*	given the three file names, read the ascii wigAscii file and produce
 *	the wigFile and wibFile outputs
 */
{
struct lineFile *lf;			/* for line file utilities	*/
char *line = (char *) NULL;		/* to receive data input line	*/
char *words[10];				/* to split data input line	*/
int wordCount = 0;			/* result of split	*/
int validLines = 0;			/* counting only lines with data */
double dataValue = 0.0;			/* from data input	*/
boolean bedData = FALSE;		/* in bed format data */
boolean variableStep = FALSE;		/* in variableStep data */
boolean fixedStep = FALSE;		/* in fixedStep data */
char *prevChromName = (char *)NULL;	/* to watch for chrom name changes */
int trackCount = 0;			/* We abort if we see more than one track. */

if ((wigAscii == (char *)NULL) || (wigFile == (char *)NULL) ||
    (wibFile == (char *)NULL))
	errAbort("wigAsciiToBinary: missing data file names, ascii: %s, wig: %s, wib: %s", wigAscii, wigFile, wibFile);

/*	need to be careful here and initialize all the global variables */
freez(&wibFileName);			/* send this name to the global */
wibFileName = cloneString(wibFile);	/* variable for use in output_row() */
lineCount = 0;	/* to count all lines	*/
add_offset = 0;	/* to allow "lifting" of the data */
validLines = 0;	/* to count only lines with data */
rowCount = 0;	/* to count rows output */
bincount = 0;	/* to count up to binsize	*/
binsize = 1024;	/* # of data points per table row */
dataSpan = 1;	/* default bases spanned per data point */
chromStart = 0;	/* for table row data */
previousOffset = 0;  /* for data missing detection */
fileOffset = 0;	/* current location within binary data file	*/
fileOffsetBegin = 0;/* location in binary data file where this bin starts*/
freez(&data_values);
freez(&validData);
data_values = (double *) needMem( (size_t) (binsize * sizeof(double)));
validData = (unsigned char *)
	    needMem( (size_t) (binsize * sizeof(unsigned char)));

if (options != NULL)
    {
    if (options->lift != 0)
	add_offset = options->lift;
    if (options->noOverlap)
	noOverlap = TRUE;
    if (options->flagOverlapSpanData)
	flagOverlapSpanData = TRUE;
    if (options->wibSizeLimit > 0)
	wibSizeLimit = options->wibSizeLimit;
    }

/* limits for the complete set of data, they must change from these initial
	defaults during processing */
overallLowerLimit = wigEncodeStartingLowerLimit;
overallUpperLimit = wigEncodeStartingUpperLimit;
binout = mustOpen(wibFile,"w");	/*	binary data file	*/
wigout = mustOpen(wigFile,"w");	/*	table row definition file */
#if defined(DEBUG)	/*	dbg	*/
chmod(wibFile, 0666);
chmod(wigFile, 0666);
#endif
lf = lineFileOpen(wigAscii, TRUE);	/*	input file	*/
while (lineFileNext(lf, &line, NULL))
    {
    boolean readingFrameSlipped;

    ++lineCount;
    if ((wibSizeLimit > 0) && (wibSize >= wibSizeLimit))
        errAbort("data size limit of %lld data values has been exceeded.  This data can be efficiently displayed with the <A HREF='/goldenPath/help/bigWig.html' TARGET=_blank>bigWig file format</A> in a custom track, or in a <A HREF='/goldenPath/help/hgTrackHubHelp.html' TARGET=_blank>Track Hub</A> for multiple large datasets.", wibSizeLimit);
    line = skipLeadingSpaces(line);
    /*	ignore blank or comment lines	*/
    if ((line == (char *)NULL) || (line[0] == '\0') || (line[0] == '#'))
	continue;		/*	!!! go to next line of input */

    wordCount = chopByWhite(line, words, ArraySize(words));

    if (sameWord("track",words[0]))
	{
	/* Allow (and ignore) one track line, but no more. */
	++trackCount;
	if (trackCount > 1)
	    errAbort("Multiple tracks seen, second at line %d of %s, can only handle one.",
	    	lf->lineIx, lf->fileName);
	continue;	
	}
    else if (sameWord("browser", words[0]))
        {
	continue;	/* ignore browser lines if present */
	}
    else if (sameWord("variableStep",words[0]))
	{
	int i;
	boolean foundChrom = FALSE;
	/*	safest thing to do if we were processing anything is to
	 *	output that previous block and start anew
	 *	Future improvement could get fancy here and decide if it
	 *	is really necessary to start over, although the concept
	 *	of a line between data points on one item may use this
	 *	block behavior later to define line segments, so don't
	 *	get too quick to be fancy here.  This line behavior
	 *	implies that feature names will need to be specified to
	 *	identify the line segments that belong together.
	 */
	if (variableStep || bedData || fixedStep)
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	dataSpan = 1;	/* default bases spanned per data point */
	for(i = 1; i < wordCount; ++i)
	    {
	    if (startsWith("chrom",words[i]))
		{
		setChromName(words[i]);
		foundChrom = TRUE;
		}
	    else if (startsWith("span",words[i]))
		setDataSpan(words[i]);
	    else
		errAbort("illegal specification on variableStep at line %lu: %s",
		    lineCount, words[i]);
	    }
	if (!foundChrom)
	    errAbort("missing chrom=<name> specification on variableStep declaration at line %lu", lineCount);
	variableStep = TRUE;
	bedData = FALSE;
	fixedStep = FALSE;
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	continue;		/*	!!!  go to next input line	*/
	}
    else if (sameWord("fixedStep",words[0]))
	{
	boolean foundChrom = FALSE;
	boolean foundStart = FALSE;
	int i;

	/*	same comment as above	*/
	if (variableStep || bedData || fixedStep)
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	stepSize = 1;	/*	default step size	*/
	dataSpan = 0;	/*      this will match step size if not set*/
	for(i = 1; i < wordCount; ++i)
	    {
	    if (startsWith("chrom",words[i]))
		{
		setChromName(words[i]);
		foundChrom = TRUE;
		}
	    else if (startsWith("start",words[i]))
		{
		setFixedStart(words[i]);
		foundStart = TRUE;
		}
	    else if (startsWith("step",words[i]))
		setStepSize(words[i]);
	    else if (startsWith("span",words[i]))
		setDataSpan(words[i]);
	    else
		errAbort("illegal specification on variableStep at line %lu: %s",
		    lineCount, words[i]);
	    }
	if (dataSpan == 0)
	    dataSpan = stepSize;
	if (!foundChrom)
	    errAbort("missing chrom=<name> specification on fixedStep declaration at line %lu", lineCount);
	if (!foundStart)
	    errAbort("missing start=<position> specification on fixedStep declaration at line %lu", lineCount);
	if (noOverlap && validLines && prevChromName)
	    {
	    if (sameWord(prevChromName,chromName) && (fixedStart < chromStart))
		errAbort("specified fixedStep chromStart %llu is less than expected next chromStart %llu", fixedStart, chromStart);
	    }
	variableStep = FALSE;
	bedData = FALSE;
	fixedStep = TRUE;
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	continue;		/*	!!!  go to next input line	*/
	}
    else if (wordCount == 4)
	{
	/*	while in bedData, we do not necessarily need to start a new
	 *	batch unless the chrom name is changing, since dataSpan
	 *	is always 1 for bedData.  As above, this may change in
	 *	the future if each bed line specification is talking
	 *	about a different feature.
	 */
	if (variableStep || fixedStep ||
		(bedData && ((prevChromName != (char *)NULL) &&
			differentWord(prevChromName,words[0]))))
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	dataSpan = 1;	/* default bases spanned per data point */
	variableStep = FALSE;
	bedData = TRUE;
	fixedStep = FALSE;
	freez(&chromName);
	chromName=cloneString(words[0]);
	freez(&featureName);
	featureName=cloneString(words[0]);
	bedChromStart = sqlLongLong(words[1]);
	bedChromEnd = sqlLongLong(words[2]);
	bedDataValue = sqlDouble(words[3]);
	/* the bed format coordinate system is zero relative, half-open,
	 * hence, no adjustment of bedChromStart is needed here, unlike the
	 * fixed and variable step formats which will subtract one from the
	 * incoming coordinate.
	 */
	if (bedChromStart >= bedChromEnd)
	    errAbort("Found chromStart >= chromEnd at line %lu (%llu > %llu)",
		lineCount, bedChromStart, bedChromEnd);
	if (bedChromEnd > (bedChromStart + 10000000))
	    errAbort("Limit of 10,000,000 length specification for bed format at line %lu, found: %llu)",
		lineCount, bedChromEnd-bedChromStart);
	if ((validLines > 0) && (bedChromStart < previousOffset))
	    errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu <-current (bed)", lineCount, previousOffset, bedChromStart);
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	}

    /*	We must be in one of these data formats at this point */
    if (!(variableStep || fixedStep || bedData))
	errAbort("at the line beginning: %s, variableStep or fixedStep data declaration not found or BED data 4 column format not recognized.", words[0]); 
    if (variableStep && (wordCount != 2))
	errAbort("Expecting two words for variableStep data at line %lu, found %d",
	    lineCount, wordCount);
    if (fixedStep && (wordCount != 1))
	errAbort("Expecting one word for fixedStep data at line %lu, found %d",
	    lineCount, wordCount);
    if (bedData && (wordCount != 4))
	errAbort("Expecting four words for bed format data at line %lu, found %d",
	    lineCount, wordCount);

    ++validLines;		/*	counting good lines of data input */

    /*	Offset is the incoming specified position for this value,
     *	fixedStart has already been converted to zero
     *	relative half open
     */
    if (variableStep)
	{
	Offset = sqlLongLong(words[0]);
	Offset = BASE_0(Offset);	/* zero relative half open */
	dataValue = sqlDouble(words[1]);
	}
    else if (fixedStep)
	{
	Offset = fixedStart + (stepSize * (validLines - 1));
	dataValue = sqlDouble(words[0]);
	}
    else if (bedData)
	{
	Offset = bedChromStart;
	dataValue = bedDataValue;
	}
    if (dataValue > overallUpperLimit) overallUpperLimit = dataValue;
    if (dataValue < overallLowerLimit) overallLowerLimit = dataValue;

    /* see if this is the first time through, establish chromStart 	*/
    if (validLines == 1)
	{
	chromStart = Offset;
	verbose(2, "first offset: %llu\n", chromStart);
	}
    else if ((validLines > 1) && (Offset <= previousOffset))
	errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu "
                 "<-current (offset)", lineCount, BASE_1(previousOffset), BASE_1(Offset));

    /* if we are working on a zoom level and the data is not exactly
     * spaced according to the span, then we need to put each value
     * in its own row in order to keep positioning correct for these
     * data values.  The number of skipped bases has to be an even
     * multiple of dataSpan
     */
    readingFrameSlipped = FALSE;
    if ((validLines > 1) && (dataSpan > 1))
	{
	unsigned long long prevEnd = previousOffset + dataSpan;
	int skippedBases;
	int spansSkipped;
	skippedBases = Offset - previousOffset;
	if (flagOverlapSpanData && (prevEnd > Offset))
	    errAbort("ERROR: data points overlapping at input line %lu.\n"
		"previous data point position: %s:%llu-%llu overlaps current: %s:%llu-%llu",
		lineCount, chromName, BASE_1(previousOffset), prevEnd,
		chromName, BASE_1(Offset),Offset+dataSpan);
	spansSkipped = skippedBases / dataSpan;
	if ((spansSkipped * dataSpan) != skippedBases)
	    readingFrameSlipped = TRUE;
	}

    if (readingFrameSlipped)
	{
	verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %lu\n", dataSpan, previousOffset, Offset, lineCount);
	output_row();
	chromStart = Offset;	/*	a full reset here	*/
	}
    /*	Check to see if data is being skipped	*/
    else if ( (validLines > 1) && (Offset > (previousOffset + dataSpan)) )
	{
	unsigned long long off;
	unsigned long long fillSize;	/* number of bytes */

	verbose(2, "missing data offsets: %llu - %llu\n",
		BASE_1(previousOffset),BASE_0(Offset));
	/*	If we are just going to fill the rest of this bin with
	 *  no data, then may as well stop here.  No need to fill
	 *  it with nothing.
	 */
	fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan;
	verbose(2, "filling NO_DATA for %llu bytes\n", fillSize);
	if (fillSize + bincount >= binsize)
	    {
	    verbose(2, "completing a bin due to  NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount);
	    verbose(2, "Offset: %llu, previousOffset: %llu\n",
		    Offset, previousOffset);
	    output_row();
	    chromStart = Offset;	/*	a full reset here	*/
	    } else {
	    fillSize = 0;
	    /*	fill missing data with NO_DATA indication	*/
	    for (off = previousOffset + dataSpan; off < Offset;
		    off += dataSpan)
		{
		++fillSize;
		++fileOffset;
		++bincount;	/*	count scores in this bin */
		if (bincount >= binsize) break;
		}
	    verbose(2, "filled NO_DATA for %llu bytes\n", fillSize);
	    /*	If that finished off this bin, output it
	     *	This most likely should not happen here.  The
	     *	check above: if (fillSize + bincount >= binsize) 
	     *	should have caught this case already.
	     */
		if (bincount >= binsize)
		    {
		    output_row();
		    chromStart = Offset;	/* a full reset here */
		    }
	    }
	}

    /*	With perhaps the missing data taken care of, back to the
     *	real data.
     */
    if (bedData)
	{
	unsigned long long bedSize = bedChromEnd - bedChromStart;
	for ( ; bedSize > 0; --bedSize )
	    {
	    setDataValue(bedDataValue);
	    Offset += 1;
	    }
	Offset -= 1;	/*	loop above increments this one too much.
			 *	This Offset is supposed to be the last
			 *	valid chrom position written, not the
			 *	next to be written */
	}
    else
	{
	setDataValue(dataValue);
	}
    previousOffset = Offset;	/* remember position for gap calculations */
    }	/*	reading file input loop end	*/

/*	Done with input file, any data points left in this bin ?	*/
if (bincount)
    output_row();

lineFileClose(&lf);
fclose(binout);
fclose(wigout);
freez(&chromName);
freez(&featureName);
freez(&data_values);
freez(&validData);
freez(&wibFileName);
/*	return limits if pointers are given	*/
if (upperLimit)
    *upperLimit = overallUpperLimit;
if (lowerLimit)
    *lowerLimit = overallLowerLimit;
if (wibSizeLimit > 0)
	options->wibSizeLimit = wibSize;
}