static void bigWigClick(struct trackDb *tdb, char *fileName) /* Display details for BigWig data tracks. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = NULL; struct lm *lm = lmInit(0); struct bbiInterval *bbList = NULL; char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery"); unsigned maxWTQ = 0; if (isNotEmpty(maxWinToQuery)) maxWTQ = sqlUnsigned(maxWinToQuery); if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart)) { bbi = bigWigFileOpen(fileName); bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm); } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, BASE_1(winStart)); sprintLongWithCommas(num2Buf, winEnd); printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf ); sprintLongWithCommas(num1Buf, winEnd-winStart); printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf); if (bbList != NULL) { bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd); } else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart)) { sprintLongWithCommas(num1Buf, maxWTQ); printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf); } else { printf("<P>No data overlapping current position.</P>"); } lmCleanup(&lm); bbiFileClose(&bbi); }
/* The single externally visible routine. * Future improvements will need to add a couple more arguments to * satisify the needs of the command line version and its options. * Currently, this is used only in customTrack input parsing. */ void wigAsciiToBinary( char *wigAscii, char *wigFile, char *wibFile, double *upperLimit, double *lowerLimit, struct wigEncodeOptions *options) /* given the three file names, read the ascii wigAscii file and produce * the wigFile and wibFile outputs */ { struct lineFile *lf; /* for line file utilities */ char *line = (char *) NULL; /* to receive data input line */ char *words[10]; /* to split data input line */ int wordCount = 0; /* result of split */ int validLines = 0; /* counting only lines with data */ double dataValue = 0.0; /* from data input */ boolean bedData = FALSE; /* in bed format data */ boolean variableStep = FALSE; /* in variableStep data */ boolean fixedStep = FALSE; /* in fixedStep data */ char *prevChromName = (char *)NULL; /* to watch for chrom name changes */ int trackCount = 0; /* We abort if we see more than one track. */ if ((wigAscii == (char *)NULL) || (wigFile == (char *)NULL) || (wibFile == (char *)NULL)) errAbort("wigAsciiToBinary: missing data file names, ascii: %s, wig: %s, wib: %s", wigAscii, wigFile, wibFile); /* need to be careful here and initialize all the global variables */ freez(&wibFileName); /* send this name to the global */ wibFileName = cloneString(wibFile); /* variable for use in output_row() */ lineCount = 0; /* to count all lines */ add_offset = 0; /* to allow "lifting" of the data */ validLines = 0; /* to count only lines with data */ rowCount = 0; /* to count rows output */ bincount = 0; /* to count up to binsize */ binsize = 1024; /* # of data points per table row */ dataSpan = 1; /* default bases spanned per data point */ chromStart = 0; /* for table row data */ previousOffset = 0; /* for data missing detection */ fileOffset = 0; /* current location within binary data file */ fileOffsetBegin = 0;/* location in binary data file where this bin starts*/ freez(&data_values); freez(&validData); data_values = (double *) needMem( (size_t) (binsize * sizeof(double))); validData = (unsigned char *) needMem( (size_t) (binsize * sizeof(unsigned char))); if (options != NULL) { if (options->lift != 0) add_offset = options->lift; if (options->noOverlap) noOverlap = TRUE; if (options->flagOverlapSpanData) flagOverlapSpanData = TRUE; if (options->wibSizeLimit > 0) wibSizeLimit = options->wibSizeLimit; } /* limits for the complete set of data, they must change from these initial defaults during processing */ overallLowerLimit = wigEncodeStartingLowerLimit; overallUpperLimit = wigEncodeStartingUpperLimit; binout = mustOpen(wibFile,"w"); /* binary data file */ wigout = mustOpen(wigFile,"w"); /* table row definition file */ #if defined(DEBUG) /* dbg */ chmod(wibFile, 0666); chmod(wigFile, 0666); #endif lf = lineFileOpen(wigAscii, TRUE); /* input file */ while (lineFileNext(lf, &line, NULL)) { boolean readingFrameSlipped; ++lineCount; if ((wibSizeLimit > 0) && (wibSize >= wibSizeLimit)) errAbort("data size limit of %lld data values has been exceeded. This data can be efficiently displayed with the <A HREF='/goldenPath/help/bigWig.html' TARGET=_blank>bigWig file format</A> in a custom track, or in a <A HREF='/goldenPath/help/hgTrackHubHelp.html' TARGET=_blank>Track Hub</A> for multiple large datasets.", wibSizeLimit); line = skipLeadingSpaces(line); /* ignore blank or comment lines */ if ((line == (char *)NULL) || (line[0] == '\0') || (line[0] == '#')) continue; /* !!! go to next line of input */ wordCount = chopByWhite(line, words, ArraySize(words)); if (sameWord("track",words[0])) { /* Allow (and ignore) one track line, but no more. */ ++trackCount; if (trackCount > 1) errAbort("Multiple tracks seen, second at line %d of %s, can only handle one.", lf->lineIx, lf->fileName); continue; } else if (sameWord("browser", words[0])) { continue; /* ignore browser lines if present */ } else if (sameWord("variableStep",words[0])) { int i; boolean foundChrom = FALSE; /* safest thing to do if we were processing anything is to * output that previous block and start anew * Future improvement could get fancy here and decide if it * is really necessary to start over, although the concept * of a line between data points on one item may use this * block behavior later to define line segments, so don't * get too quick to be fancy here. This line behavior * implies that feature names will need to be specified to * identify the line segments that belong together. */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (!foundChrom) errAbort("missing chrom=<name> specification on variableStep declaration at line %lu", lineCount); variableStep = TRUE; bedData = FALSE; fixedStep = FALSE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (sameWord("fixedStep",words[0])) { boolean foundChrom = FALSE; boolean foundStart = FALSE; int i; /* same comment as above */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } stepSize = 1; /* default step size */ dataSpan = 0; /* this will match step size if not set*/ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("start",words[i])) { setFixedStart(words[i]); foundStart = TRUE; } else if (startsWith("step",words[i])) setStepSize(words[i]); else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (dataSpan == 0) dataSpan = stepSize; if (!foundChrom) errAbort("missing chrom=<name> specification on fixedStep declaration at line %lu", lineCount); if (!foundStart) errAbort("missing start=<position> specification on fixedStep declaration at line %lu", lineCount); if (noOverlap && validLines && prevChromName) { if (sameWord(prevChromName,chromName) && (fixedStart < chromStart)) errAbort("specified fixedStep chromStart %llu is less than expected next chromStart %llu", fixedStart, chromStart); } variableStep = FALSE; bedData = FALSE; fixedStep = TRUE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (wordCount == 4) { /* while in bedData, we do not necessarily need to start a new * batch unless the chrom name is changing, since dataSpan * is always 1 for bedData. As above, this may change in * the future if each bed line specification is talking * about a different feature. */ if (variableStep || fixedStep || (bedData && ((prevChromName != (char *)NULL) && differentWord(prevChromName,words[0])))) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ variableStep = FALSE; bedData = TRUE; fixedStep = FALSE; freez(&chromName); chromName=cloneString(words[0]); freez(&featureName); featureName=cloneString(words[0]); bedChromStart = sqlLongLong(words[1]); bedChromEnd = sqlLongLong(words[2]); bedDataValue = sqlDouble(words[3]); /* the bed format coordinate system is zero relative, half-open, * hence, no adjustment of bedChromStart is needed here, unlike the * fixed and variable step formats which will subtract one from the * incoming coordinate. */ if (bedChromStart >= bedChromEnd) errAbort("Found chromStart >= chromEnd at line %lu (%llu > %llu)", lineCount, bedChromStart, bedChromEnd); if (bedChromEnd > (bedChromStart + 10000000)) errAbort("Limit of 10,000,000 length specification for bed format at line %lu, found: %llu)", lineCount, bedChromEnd-bedChromStart); if ((validLines > 0) && (bedChromStart < previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu <-current (bed)", lineCount, previousOffset, bedChromStart); freez(&prevChromName); prevChromName = cloneString(chromName); } /* We must be in one of these data formats at this point */ if (!(variableStep || fixedStep || bedData)) errAbort("at the line beginning: %s, variableStep or fixedStep data declaration not found or BED data 4 column format not recognized.", words[0]); if (variableStep && (wordCount != 2)) errAbort("Expecting two words for variableStep data at line %lu, found %d", lineCount, wordCount); if (fixedStep && (wordCount != 1)) errAbort("Expecting one word for fixedStep data at line %lu, found %d", lineCount, wordCount); if (bedData && (wordCount != 4)) errAbort("Expecting four words for bed format data at line %lu, found %d", lineCount, wordCount); ++validLines; /* counting good lines of data input */ /* Offset is the incoming specified position for this value, * fixedStart has already been converted to zero * relative half open */ if (variableStep) { Offset = sqlLongLong(words[0]); Offset = BASE_0(Offset); /* zero relative half open */ dataValue = sqlDouble(words[1]); } else if (fixedStep) { Offset = fixedStart + (stepSize * (validLines - 1)); dataValue = sqlDouble(words[0]); } else if (bedData) { Offset = bedChromStart; dataValue = bedDataValue; } if (dataValue > overallUpperLimit) overallUpperLimit = dataValue; if (dataValue < overallLowerLimit) overallLowerLimit = dataValue; /* see if this is the first time through, establish chromStart */ if (validLines == 1) { chromStart = Offset; verbose(2, "first offset: %llu\n", chromStart); } else if ((validLines > 1) && (Offset <= previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu " "<-current (offset)", lineCount, BASE_1(previousOffset), BASE_1(Offset)); /* if we are working on a zoom level and the data is not exactly * spaced according to the span, then we need to put each value * in its own row in order to keep positioning correct for these * data values. The number of skipped bases has to be an even * multiple of dataSpan */ readingFrameSlipped = FALSE; if ((validLines > 1) && (dataSpan > 1)) { unsigned long long prevEnd = previousOffset + dataSpan; int skippedBases; int spansSkipped; skippedBases = Offset - previousOffset; if (flagOverlapSpanData && (prevEnd > Offset)) errAbort("ERROR: data points overlapping at input line %lu.\n" "previous data point position: %s:%llu-%llu overlaps current: %s:%llu-%llu", lineCount, chromName, BASE_1(previousOffset), prevEnd, chromName, BASE_1(Offset),Offset+dataSpan); spansSkipped = skippedBases / dataSpan; if ((spansSkipped * dataSpan) != skippedBases) readingFrameSlipped = TRUE; } if (readingFrameSlipped) { verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %lu\n", dataSpan, previousOffset, Offset, lineCount); output_row(); chromStart = Offset; /* a full reset here */ } /* Check to see if data is being skipped */ else if ( (validLines > 1) && (Offset > (previousOffset + dataSpan)) ) { unsigned long long off; unsigned long long fillSize; /* number of bytes */ verbose(2, "missing data offsets: %llu - %llu\n", BASE_1(previousOffset),BASE_0(Offset)); /* If we are just going to fill the rest of this bin with * no data, then may as well stop here. No need to fill * it with nothing. */ fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan; verbose(2, "filling NO_DATA for %llu bytes\n", fillSize); if (fillSize + bincount >= binsize) { verbose(2, "completing a bin due to NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount); verbose(2, "Offset: %llu, previousOffset: %llu\n", Offset, previousOffset); output_row(); chromStart = Offset; /* a full reset here */ } else { fillSize = 0; /* fill missing data with NO_DATA indication */ for (off = previousOffset + dataSpan; off < Offset; off += dataSpan) { ++fillSize; ++fileOffset; ++bincount; /* count scores in this bin */ if (bincount >= binsize) break; } verbose(2, "filled NO_DATA for %llu bytes\n", fillSize); /* If that finished off this bin, output it * This most likely should not happen here. The * check above: if (fillSize + bincount >= binsize) * should have caught this case already. */ if (bincount >= binsize) { output_row(); chromStart = Offset; /* a full reset here */ } } } /* With perhaps the missing data taken care of, back to the * real data. */ if (bedData) { unsigned long long bedSize = bedChromEnd - bedChromStart; for ( ; bedSize > 0; --bedSize ) { setDataValue(bedDataValue); Offset += 1; } Offset -= 1; /* loop above increments this one too much. * This Offset is supposed to be the last * valid chrom position written, not the * next to be written */ } else { setDataValue(dataValue); } previousOffset = Offset; /* remember position for gap calculations */ } /* reading file input loop end */ /* Done with input file, any data points left in this bin ? */ if (bincount) output_row(); lineFileClose(&lf); fclose(binout); fclose(wigout); freez(&chromName); freez(&featureName); freez(&data_values); freez(&validData); freez(&wibFileName); /* return limits if pointers are given */ if (upperLimit) *upperLimit = overallUpperLimit; if (lowerLimit) *lowerLimit = overallLowerLimit; if (wibSizeLimit > 0) options->wibSizeLimit = wibSize; }