static void bigWigLoadItems(struct track *tg)
	/* Fill up tg->items with bedGraphItems derived from a bigWig file */
{
	char *extTableString = trackDbSetting(tg->tdb, "extTable");

	if (extTableString != NULL)
	{
		// if there's an extra table, read this one in too
		struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
		char *fileName = bbiNameFromTable(conn, tg->table);
		struct bbiFile *bbiFile = bigWigFileOpen(fileName);
		slAddHead(&tg->bbiFile, bbiFile);

		fileName = bbiNameFromTable(conn, extTableString);
		bbiFile = bigWigFileOpen(fileName);
		slAddHead(&tg->bbiFile, bbiFile);

		hFreeConn(&conn);
	}
	else
	{
		if (tg->bbiFile == NULL)
		{
			/* Figure out bigWig file name. */
			struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
			char *fileName = bbiNameFromTable(conn, tg->table);
			tg->bbiFile = bigWigFileOpen(fileName);
			hFreeConn(&conn);
		}
	}
}
Ejemplo n.º 2
0
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;
char *enrichedIn = elderVf->enrichedIn;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    {
    struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

    /* Get open big wig files for both younger and older. */
    char *elderPath = edwPathForFileId(conn, elderEf->id);
    char *youngerPath = edwPathForFileId(conn, youngerEf->id);
    struct bbiFile *elderBbi = bigWigFileOpen(elderPath);
    struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath);

    /* Figure out thresholds */
    double elderThreshold = twoStdsOverMean(elderBbi);
    double youngerThreshold = twoStdsOverMean(youngerBbi);

    /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
    struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew();
    struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
    struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
    struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, 
	    elderThreshold, youngerThreshold, c, cInEnriched, cClipped);
	}

    /* Make up correlation structure . */
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    cor->pearsonClipped = correlateResult(cClipped);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);


    bigWigValsOnChromFree(&bVals);
    bigWigValsOnChromFree(&aVals);
    genomeRangeTreeFree(&targetGrt);
    freez(&cor);
    correlateFree(&c);
    bigWigFileClose(&youngerBbi);
    bigWigFileClose(&elderBbi);
    freez(&youngerPath);
    freez(&elderPath);
    }
}
Ejemplo n.º 3
0
void bigWigFillDataVector(char *table, struct region *region,
	struct sqlConnection *conn, struct dataVector *vector)
/* Fill in data vector with bigWig info on region.  Handles filters and intersections. */
{
/* Figure out filter values if any. */
double ll, ul;
enum wigCompare cmp;
getWigFilter(database, curTable, &cmp, &ll, &ul);

/* Get intervals that pass filter and intersection. */
struct lm *lm = lmInit(0);
char *fileName = bigWigFileName(table, conn);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct bbiInterval *iv, *ivList;
ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
int vIndex = 0;
for (iv = ivList; iv != NULL; iv = iv->next)
    {
    int start = max(iv->start, region->start);
    int end = min(iv->end, region->end);
    double val = iv->val;
    int i;
    for (i=start; i<end && vIndex < vector->maxCount; ++i)
        {
	vector->value[vIndex] = val;
	vector->position[vIndex] = i;
	++vIndex;
	}
    }
vector->count = vIndex;
bbiFileClose(&bwf);
freeMem(fileName);
lmCleanup(&lm);
}
Ejemplo n.º 4
0
void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab)
/* bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns. */
{
struct bed *bedList;
int fieldCount;
bedLoadAllReturnFieldCount(inBed, &bedList, &fieldCount);
checkUniqueNames(bedList);

struct bbiFile *bbi = bigWigFileOpen(inBw);
FILE *f = mustOpen(outTab, "w");
FILE *bedF = NULL;
if (bedOut != NULL)
    bedF = mustOpen(bedOut, "w");

/* Count up number of blocks in file.  It takes about 1/100th of of second to
 * look up a single block in a bigWig.  On the other hand to stream through
 * the whole file setting a array of doubles takes about 30 seconds, so we change
 * strategy at 3,000 blocks. 
 *   I (Jim) usually avoid having two paths through the code like this, and am tempted
 * to always go the ~30 second chromosome-at-a-time  way.  On the other hand the block-way
 * was developed first, and it was useful to have both ways to test against each other.
 * (This found a bug where the chromosome way wasn't handling beds in chromosomes not
 * covered by the bigWig for instance).  Since this code is not likely to change too
 * much, keeping both implementations in seems reasonable. */
int blockCount = countBlocks(bedList, fieldCount);
verbose(2, "Got %d blocks, if >= 3000 will use chromosome-at-a-time method\n", blockCount);

if (blockCount < 3000)
    averageFetchingEachBlock(bbi, bedList, fieldCount, f, bedF);
else
    averageFetchingEachChrom(bbi, &bedList, fieldCount, f, bedF);

carefulClose(&bedF);
carefulClose(&f);
}
Ejemplo n.º 5
0
/* --- .Call ENTRY POINT --- */
SEXP BWGFile_summary(SEXP r_filename, SEXP r_chrom, SEXP r_ranges,
                     SEXP r_size, SEXP r_type, SEXP r_default_value)
{
  pushRHandlers();
  struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename)));
  enum bbiSummaryType type =
    bbiSummaryTypeFromString((char *)CHAR(asChar(r_type)));
  double default_value = asReal(r_default_value);
  int *start = INTEGER(get_IRanges_start(r_ranges));
  int *width = INTEGER(get_IRanges_width(r_ranges));
  SEXP ans;
  
  PROTECT(ans = allocVector(VECSXP, length(r_chrom)));
  for (int i = 0; i < length(r_chrom); i++) {
    int size = INTEGER(r_size)[i];
    char *chrom = (char *)CHAR(STRING_ELT(r_chrom, i));
    SEXP r_values = allocVector(REALSXP, size);
    double *values = REAL(r_values);
    for (int j = 0; j < size; j++)
      values[j] = default_value;
    SET_VECTOR_ELT(ans, i, r_values);
    bool success = bigWigSummaryArray(file, chrom, start[i] - 1,
                                      start[i] - 1 + width[i], type, size,
                                      values);
    if (!success)
      warning("Failed to summarize range %d (%s:%d-%d)", i, chrom, start[i],
            start[i] - 1 + width[i]);
  }
  bbiFileClose(&file);
  popRHandlers();
  UNPROTECT(1);
  return ans;
}
Ejemplo n.º 6
0
void checkInputOpenFiles(struct inInfo *array, int count)
/* Make sure all of the input is there and of right format before going forward. Since
 * this is going to take a while we want to fail fast. */
{
int i;
for (i=0; i<count; ++i)
    {
    struct inInfo *in = &array[i];
    switch (in->type)
        {
	case itBigWig:
	    {
	    /* Just open and close, it will abort if any problem. */
	    in->bbi = bigWigFileOpen(in->fileName);
	    break;
	    }
	case itPromoterBed:
	case itUnstrandedBed:
	case itBlockedBed:
	    {
	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
	    char *line;
	    lineFileNeedNext(lf, &line, NULL);
	    char *dupe = cloneString(line);
	    char *row[256];
	    int wordCount = chopLine(dupe, row);
	    struct bed *bed = NULL;
	    switch (in->type)
	        {
		case itPromoterBed:
		    lineFileExpectAtLeast(lf, 6, wordCount);
		    bed = bedLoadN(row, 6);
		    char strand = bed->strand[0];
		    if (strand != '+' && strand != '-')
		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
		    break;
		case itUnstrandedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 4);
		    break;
		case itBlockedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 12);
		    break;
		default:
		    internalErr();
		    break;
		}
	    bedFree(&bed);
	    freez(&dupe);
	    lineFileReuse(lf);
	    break;
	    }
	default:
	    internalErr();
	    break;
	}
    }
}
Ejemplo n.º 7
0
void printBiggestGap(char *database, struct sqlConnection *conn, 
	struct slName *chromList, struct hash *chromHash, char *track)
/* Look up track in database, figure out which type it is, call
 * appropriate biggest gap finder, and then print result. */
{
struct trackDb *tdb = hTrackInfo(conn, track);
struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table);
char *typeWord = cloneFirstWord(tdb->type);
boolean isBig = FALSE, isBigBed = FALSE;
struct bbiFile *bbi = NULL;
if (sameString(typeWord, "bigBed"))
    {
    isBig = TRUE;
    isBigBed = TRUE;
    bbi = bigBedFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) );
    }
else if (sameString(typeWord, "bigWig"))
    {
    isBig = TRUE;
    bbi = bigWigFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) );
    }
char *biggestChrom = NULL;
int biggestSize = 0, biggestStart = 0, biggestEnd = 0;

struct slName *chrom;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    if (!allParts && strchr(chrom->name, '_'))	// Generally skip weird chroms
        continue;
    if (female && sameString(chrom->name, "chrY"))
        continue;
    int chromSize = hashIntVal(chromHash, chrom->name);
    struct rbTree *rt = rangeTreeNew();
    int start = 0, end = 0, size = 0;
    if (isBig)
	bigCoverageIntoTree(tdb, bbi, chrom->name, chromSize, rt, isBigBed);
    else
        tableCoverageIntoTree(hti, tdb, conn, chrom->name, chromSize, rt);
    if (rt->n > 0)	// Want to keep completely uncovered chromosome uncovered
	addGaps(conn, chrom->name, rt);
    biggestGapFromRangeTree(rt, chromSize, &start, &end, &size);
    if (size > biggestSize)
        {
	biggestSize = size;
	biggestStart = start;
	biggestEnd = end;
	biggestChrom = chrom->name;
	}
    rangeTreeFree(&rt);
    }
printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd);
if (noComma)
    printf("%d", biggestSize);
else
    printLongWithCommas(stdout, biggestSize);
putchar('\n');
freez(&typeWord);
bbiFileClose(&bbi);
}
Ejemplo n.º 8
0
void bigWigCorrelate(char *aFileName, char *bFileName)
/* bigWigCorrelate - Correlate bigWig files, optionally only on target regions.. */
{
struct genomeRangeTree *targetGrt = NULL;
if (restrictFile)
    targetGrt = grtFromBigBed(restrictFile);
struct bbiFile *aBbi = bigWigFileOpen(aFileName);
struct bbiFile *bBbi = bigWigFileOpen(bFileName);
struct correlate *c = correlateNew();
struct bbiChromInfo *chrom, *chromList = bbiChromList(aBbi);
struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    addBwCorrelations(chrom, targetGrt, aVals, bVals, aBbi, bBbi, threshold, threshold, c);
    }
printf("%g\n", correlateResult(c));
}
Ejemplo n.º 9
0
boolean bigWigSummaryArrayExtended(char *fileName, char *chrom, bits32 start, bits32 end,
	int summarySize, struct bbiSummaryElement *summary)
/* Get extended summary information for summarySize evenely spaced elements into
 * the summary array. */
{
struct bbiFile *bbi = bigWigFileOpen(fileName);
boolean ret = bbiSummaryArrayExtended(bbi, chrom, start, end, bigWigIntervalQuery,
	summarySize, summary);
bbiFileClose(&bbi);
return ret;
}
Ejemplo n.º 10
0
boolean bigWigSummaryArray(char *fileName, char *chrom, bits32 start, bits32 end,
	enum bbiSummaryType summaryType, int summarySize, double *summaryValues)
/* Fill in summaryValues with  data from indicated chromosome range in bigWig file.
 * Be sure to initialize summaryValues to a default value, which will not be touched
 * for regions without data in file.  (Generally you want the default value to either
 * be 0.0 or nan("") depending on the application.)  Returns FALSE if no data
 * at that position. */
{
struct bbiFile *bwf = bigWigFileOpen(fileName);
boolean ret = bbiSummaryArray(bwf, chrom, start, end, bigWigIntervalQuery,
	summaryType, summarySize, summaryValues);
bbiFileClose(&bwf);
return ret;
}
Ejemplo n.º 11
0
void addWigsInFile(char *fileName, struct bbiFile **pList)
/* Treate  each non-empty non-sharp line of fileName as a bigWig file name
 * and try to load the bigWig and add to list */
{
int i,count;
char **words, *buf = NULL;
readAllWords(fileName, &words ,&count, &buf);
for (i=0; i<count; ++i)
    {
    struct bbiFile *inFile = bigWigFileOpen(words[i]);
    slAddTail(pList, inFile);
    }
freeMem(words);
freeMem(buf);
}
Ejemplo n.º 12
0
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = asParseText(annoRowBigWigAsText);
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWig;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Ejemplo n.º 13
0
int bigWigOutRegion(char *table, struct sqlConnection *conn,
			     struct region *region, int maxOut,
			     enum wigOutputType wigOutType)
/* Write out bigWig for region, doing intersecting and filtering as need be. */
{
boolean isMerged = anySubtrackMerge(table, database);
int resultCount = 0;
char *wigFileName = bigWigFileName(table, conn);
if (wigFileName)
    {
    struct bbiFile *bwf = bigWigFileOpen(wigFileName);
    if (bwf)
	{
	/* Easy case, just dump out data. */
	if (!anyFilter() && !anyIntersection() && !isMerged && wigOutType == wigOutData)
	    resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end,
		    maxOut, stdout);
	/* Pretty easy case, still do it ourselves. */
	else if (!isMerged && wigOutType == wigOutData)
	    {
	    double ll, ul;
	    enum wigCompare cmp;
	    getWigFilter(database, curTable, &cmp, &ll, &ul);
	    struct lm *lm = lmInit(0);
	    struct bbiInterval *ivList, *iv;
	    ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
	    for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount)
	        {
		fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val);
		}
	    lmCleanup(&lm);
	    }
	/* Harder cases - resort to making a data vector and letting that machinery handle it. */
	else
	    {
	    struct dataVector *dv = bigWigDataVector(table, conn, region);
	    resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL);
	    dataVectorFree(&dv);
	    }
	}
    bbiFileClose(&bwf);
    }
freeMem(wigFileName);
return resultCount;
}
Ejemplo n.º 14
0
static void bigWigClick(struct trackDb *tdb, char *fileName)
/* Display details for BigWig data tracks. */
{
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = NULL;
struct lm *lm = lmInit(0);
struct bbiInterval *bbList = NULL;
char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery");

unsigned maxWTQ = 0;
if (isNotEmpty(maxWinToQuery))
    maxWTQ = sqlUnsigned(maxWinToQuery);

if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart))
    {
    bbi = bigWigFileOpen(fileName);
    bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm);
    }

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, BASE_1(winStart));
sprintLongWithCommas(num2Buf, winEnd);
printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf );
sprintLongWithCommas(num1Buf, winEnd-winStart);
printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf);

if (bbList != NULL)
    {
    bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd);
    }
else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart))
    {
    sprintLongWithCommas(num1Buf, maxWTQ);
    printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf);
    }
else
    {
    printf("<P>No data overlapping current position.</P>");
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
Ejemplo n.º 15
0
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = annoStreamBigWigAsObject();
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
//#*** Would be more memory-efficient to do arWigSingle for bedGraphs.
//#*** annoGrateWig would need to be updated to handle incoming arWigSingle.
streamer->rowType = arWigVec;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->chromList = bbiChromList(bbi);
self->bbi = bbi;
return (struct annoStreamer *)self;
}
Ejemplo n.º 16
0
struct bed *bigWigIntervalsToBed(struct sqlConnection *conn, char *table, struct region *region,
				 struct lm *lm)
/* Return a list of unfiltered, unintersected intervals in region as bed (for
 * secondary table in intersection). */
{
struct bed *bed, *bedList = NULL;
char *fileName = bigWigFileName(table, conn);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, region->chrom, region->start,
						      region->end, lm);
for (iv = ivList;  iv != NULL;  iv = iv->next)
    {
    lmAllocVar(lm, bed);
    bed->chrom = region->chrom;
    bed->chromStart = iv->start;
    bed->chromEnd = iv->end;
    slAddHead(&bedList, bed);
    }
slReverse(&bedList);
return bedList;
}
Ejemplo n.º 17
0
void bigWigInfo(char *fileName)
/* bigWigInfo - Print out information about bigWig file.. */
{
struct bbiFile *bwf = bigWigFileOpen(fileName);

if (optionExists("minMax"))
    {
    struct bbiSummaryElement sum = bbiTotalSummary(bwf);
    printf("%f %f\n", sum.minVal, sum.maxVal);
    return;
    }

printf("version: %d\n", bwf->version);
printf("isCompressed: %s\n", (bwf->uncompressBufSize > 0 ? "yes" : "no"));
printf("isSwapped: %d\n", bwf->isSwapped);
printLabelAndLongNumber("primaryDataSize", bwf->unzoomedIndexOffset - bwf->unzoomedDataOffset);
if (bwf->levelList != NULL)
    {
    long long indexEnd = bwf->levelList->dataOffset;
    printLabelAndLongNumber("primaryIndexSize", indexEnd - bwf->unzoomedIndexOffset);
    }
printf("zoomLevels: %d\n", bwf->zoomLevels);
if (optionExists("zooms"))
    {
    struct bbiZoomLevel *zoom;
    for (zoom = bwf->levelList; zoom != NULL; zoom = zoom->next)
	printf("\t%d\t%d\n", zoom->reductionLevel, (int)(zoom->indexOffset - zoom->dataOffset));
    }
struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf);
printf("chromCount: %d\n", slCount(chromList));
if (optionExists("chroms"))
    for (chrom=chromList; chrom != NULL; chrom = chrom->next)
	printf("\t%s %d %d\n", chrom->name, chrom->id, chrom->size);
struct bbiSummaryElement sum = bbiTotalSummary(bwf);
printLabelAndLongNumber("basesCovered", sum.validCount);
printf("mean: %f\n", sum.sumData/sum.validCount);
printf("min: %f\n", sum.minVal);
printf("max: %f\n", sum.maxVal);
printf("std: %f\n", calcStdFromSums(sum.sumData, sum.sumSquares, sum.validCount));
}
Ejemplo n.º 18
0
/* --- .Call ENTRY POINT --- */
SEXP BWGFile_seqlengths(SEXP r_filename) {
  pushRHandlers();
  struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename)));
  struct bbiChromInfo *chromList = bbiChromList(file);
  struct bbiChromInfo *chrom = chromList;
  SEXP seqlengths, seqlengthNames;
  
  PROTECT(seqlengths = allocVector(INTSXP, slCount(chromList)));
  seqlengthNames = allocVector(STRSXP, length(seqlengths));
  setAttrib(seqlengths, R_NamesSymbol, seqlengthNames);
  
  for(int i = 0; i < length(seqlengths); i++) {
    INTEGER(seqlengths)[i] = chrom->size;
    SET_STRING_ELT(seqlengthNames, i, mkChar(chrom->name));
    chrom = chrom->next;
  }
  
  bbiChromInfoFreeList(&chromList);
  popRHandlers();
  UNPROTECT(1);
  return seqlengths;
}
Ejemplo n.º 19
0
bigWig_t * bigwig_load(const char * filename, const char * udc_dir) {
  bigWig_t * bigwig = NULL;
  struct errCatch * err;

  /* set cache */
  if (udc_dir != NULL)
    udcSetDefaultDir((char*) udc_dir);

  /* setup error management & try to open file */
  err = errCatchNew();
  if (errCatchStart(err))
    bigwig = bigWigFileOpen((char*)filename);
  errCatchEnd(err);
  if (err->gotError) {
    fprintf(stderr, "error: %s\n", err->message->string);
    errCatchFree(&err);
    return NULL;
  }
  errCatchFree(&err);

  return bigwig;
}
Ejemplo n.º 20
0
char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, 
    struct composite *comp, struct taggedFile *tfList)
/* Print out info for a bigWig view, including subtracks. */
{
/* Look at all tracks in this view and calculate overall limits. */
double sumOfSums = 0, sumOfSumSquares = 0;
bits64 sumOfN = 0;
struct taggedFile *tf;
for (tf = tfList; tf != NULL; tf = tf->next)
    {
    if (sameString(tf->manifest->outputType, view->name))
	{
	char *relativeName = tf->manifest->fileName;
	char *path = relativeName;
	struct bbiFile *bbi = bigWigFileOpen(path);
	struct bbiSummaryElement sum = bbiTotalSummary(bbi);
	sumOfSums += sum.sumData;
	sumOfSumSquares += sum.sumSquares;
	sumOfN = sum.validCount;
	bigWigFileClose(&bbi);
	}
    }
double mean = sumOfSums/sumOfN;
double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN);
double clipMax = mean + 6*std;

/* Output view stanza. */
char type[64];
safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax);
fprintf(f, "%stype %s\n", indent, type);
fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax);
fprintf(f, "%sminLimit 0\n", indent);
fprintf(f, "%smaxLimit %g\n", indent, clipMax);
fprintf(f, "%sautoScale off\n", indent);
fprintf(f, "%smaxHeightPixels 100:32:16\n", indent);
fprintf(f, "%swindowingFunction mean+whiskers\n", indent);
return cloneString(type);
}
void bigWigToWig(char *inFile, char *outFile)
/* bigWigToWig - Convert bigWig to wig.  This will keep more of the same structure of the 
 * original wig than bigWigToBedGraph does, but still will break up large stepped sections into 
 * smaller ones. */
{
struct bbiFile *bwf = bigWigFileOpen(inFile);
FILE *f = mustOpen(outFile, "w");
struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    if (clChrom != NULL && !sameString(clChrom, chrom->name))
        continue;
    char *chromName = chrom->name;
    int start = 0, end = chrom->size;
    if (clStart > 0)
        start = clStart;
    if (clEnd > 0)
        end = clEnd;
    bigWigIntervalDump(bwf, chromName, start, end, 0, f);
    }
bbiChromInfoFreeList(&chromList);
carefulClose(&f);
bbiFileClose(&bwf);
}
Ejemplo n.º 22
0
/* This old way is ~3 times as slow */
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    long startBigQueryTime = clock1000();
    struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    struct bbiInterval *iv;

    /* Loop through all targets adding overlaps from ivList */
    long startOverlapTime = clock1000();
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->uniqOverlapBases += overlap;
		target->overlapBases += overlap * iv->val;
		}
	    }
	}
    long endOverlapTime = clock1000();
    totalOverlapTime += endOverlapTime - startOverlapTime;
    lmCleanup(&lm);
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
Ejemplo n.º 23
0
struct bbiFile *I_bigWigFileOpen(const char *fileName)
{
    char c_fileName[256];
    strcpy(c_fileName, fileName);
    return bigWigFileOpen(c_fileName);
}
Ejemplo n.º 24
0
/* --- .Call ENTRY POINT --- */
SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_return_score, 
                   SEXP r_return_list) {
  pushRHandlers();
  struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename)));
  SEXP chromNames = getAttrib(r_ranges, R_NamesSymbol);
  int nchroms = length(r_ranges);
  Rboolean return_list = asLogical(r_return_list);
  SEXP rangesList, rangesListEls, dataFrameList, dataFrameListEls, ans;
  SEXP numericListEls;
  bool returnScore = asLogical(r_return_score);
  const char *var_names[] = { "score", "" };
  struct lm *lm = lmInit(0);
 
  struct bbiInterval *hits = NULL;
  struct bbiInterval *qhits = NULL;

  if (return_list) {
    int n_ranges = 0;
    for(int i = 0; i < nchroms; i++) {
      SEXP localRanges = VECTOR_ELT(r_ranges, i);
      n_ranges += get_IRanges_length(localRanges);
    }
    PROTECT(numericListEls = allocVector(VECSXP, n_ranges));
  } else {
    PROTECT(rangesListEls = allocVector(VECSXP, nchroms));
    setAttrib(rangesListEls, R_NamesSymbol, chromNames);
    PROTECT(dataFrameListEls = allocVector(VECSXP, nchroms));
    setAttrib(dataFrameListEls, R_NamesSymbol, chromNames);
  }

  int elt_len = 0;
  for (int i = 0; i < nchroms; i++) {
    SEXP localRanges = VECTOR_ELT(r_ranges, i);
    int nranges = get_IRanges_length(localRanges);
    int *start = INTEGER(get_IRanges_start(localRanges));
    int *width = INTEGER(get_IRanges_width(localRanges));
    for (int j = 0; j < nranges; j++) {
      struct bbiInterval *queryHits =
        bigWigIntervalQuery(file, (char *)CHAR(STRING_ELT(chromNames, i)),
                            start[j] - 1, start[j] - 1 + width[j], lm);
      /* IntegerList */
      if (return_list) {
        qhits = queryHits;
        int nqhits = slCount(queryHits);
        SEXP ans_numeric;
        PROTECT(ans_numeric = allocVector(REALSXP, width[j]));
        memset(REAL(ans_numeric), 0, sizeof(double) * width[j]);
        for (int k = 0; k < nqhits; k++, qhits = qhits->next) {
          for (int l = qhits->start; l < qhits->end; l++)
            REAL(ans_numeric)[(l - start[j] + 1)] = qhits->val;
        }
        SET_VECTOR_ELT(numericListEls, elt_len, ans_numeric);
        elt_len++;
        UNPROTECT(1);
      }
      slReverse(&queryHits);
      hits = slCat(queryHits, hits);
    } 

    /* RangedData */
    if (!return_list) {
      int nhits = slCount(hits);
      slReverse(&hits);
      SEXP ans_start, ans_width, ans_score, ans_score_l;
      PROTECT(ans_start = allocVector(INTSXP, nhits));
      PROTECT(ans_width = allocVector(INTSXP, nhits));

      if (returnScore) {
        PROTECT(ans_score_l = mkNamed(VECSXP, var_names));
        ans_score = allocVector(REALSXP, nhits);
        SET_VECTOR_ELT(ans_score_l, 0, ans_score);
      } else {
        PROTECT(ans_score_l = mkNamed(VECSXP, var_names + 1));
      }

      for (int j = 0; j < nhits; j++, hits = hits->next) {
        INTEGER(ans_start)[j] = hits->start + 1;
        INTEGER(ans_width)[j] = hits->end - hits->start;
        if (returnScore)
          REAL(ans_score)[j] = hits->val;
      }
      SET_VECTOR_ELT(rangesListEls, i,
                     new_IRanges("IRanges", ans_start, ans_width, R_NilValue));
      SET_VECTOR_ELT(dataFrameListEls, i,
                     new_DataFrame("DataFrame", ans_score_l, R_NilValue,
                                   ScalarInteger(nhits)));
      UNPROTECT(3);
    }
  }

  bbiFileClose(&file);

  if (return_list) {
    ans = new_SimpleList("SimpleList", numericListEls);
    UNPROTECT(1);
  } else { 
    PROTECT(dataFrameList =
            new_SimpleList("SimpleSplitDataFrameList", dataFrameListEls));
    PROTECT(rangesList = new_SimpleList("SimpleRangesList", rangesListEls));
    ans = new_RangedData("RangedData", rangesList, dataFrameList);
    UNPROTECT(4);
  }

  lmCleanup(&lm);
  popRHandlers();
  return ans;
}
Ejemplo n.º 25
0
void bigWigMerge(int inCount, char *inFiles[], char *outFile)
/* bigWigMerge - Merge together multiple bigWigs into a single one.. */
{
/* Make a list of open bigWig files. */
struct bbiFile *inFile, *inFileList = NULL;
int i;
for (i=0; i<inCount; ++i)
    {
    if (clInList)
        {
	addWigsInFile(inFiles[i], &inFileList);
	}
    else
	{
	inFile = bigWigFileOpen(inFiles[i]);
	slAddTail(&inFileList, inFile);
	}
    }

FILE *f = mustOpen(outFile, "w");

struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList);
verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", 
	slCount(chromList), slCount(inFileList));
double *mergeBuf = NULL;
int mergeBufSize = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct lm *lm = lmInit(0);

    /* Make sure merge buffer is big enough. */
    int chromSize = chrom->size;
    verboseDot();
    verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize);
    if (chromSize > mergeBufSize)
        {
	mergeBufSize = chromSize;
	freeMem(mergeBuf);
	mergeBuf = needHugeMem(mergeBufSize * sizeof(double));
	}
    int i;
    for (i=0; i<chromSize; ++i)
        mergeBuf[i] = 0.0;

    /* Loop through each input file grabbing data and merging it in. */
    for (inFile = inFileList; inFile != NULL; inFile = inFile->next)
        {
	struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm);
	verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName);
	struct bbiInterval *iv;
	for (iv = ivList; iv != NULL; iv = iv->next)
	    {
	    double val = iv->val;
	    if (val > clClip)
	        val = clClip;
	    int end = iv->end;
	    for (i=iv->start; i < end; ++i)
	         mergeBuf[i] += val;
	    }
	}


    /* Output each range of same values as a bedGraph item */
    int sameCount;
    for (i=0; i<chromSize; i += sameCount)
        {
	sameCount = doublesTheSame(mergeBuf+i, chromSize-i);
	double val = mergeBuf[i] + clAdjust;
	if (val > clThreshold)
	    fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val);
	}

    lmCleanup(&lm);
    }
verbose(1, "\n");

carefulClose(&f);
}
Ejemplo n.º 26
0
void doSummaryStatsBigWig(struct sqlConnection *conn)
/* Put up page showing summary stats for bigWig track. */
{
struct trackDb *track = curTrack;
char *table = curTable;
char *shortLabel = (track == NULL ? table : track->shortLabel);
char *fileName = bigWigFileName(table, conn);
long startTime = clock1000();

htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table);

if (anySubtrackMerge(database, curTable))
    hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
	    "page (not implemented yet).  Statistics shown here are only for "
	    "the primary table %s (%s).</EM>", shortLabel, table);

struct bbiFile *bwf = bigWigFileOpen(fileName);
struct region *region, *regionList = getRegions();
double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
bits64 validCount = 0;

if (!anyFilter() && !anyIntersection())
    {
    for (region = regionList; region != NULL; region = region->next)
	{
	struct bbiSummaryElement sum;
	if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end,
		bigWigIntervalQuery, 1, &sum))
	    {
	    if (validCount == 0)
		{
		minVal = sum.minVal;
		maxVal = sum.maxVal;
		}
	    else
	        {
		if (sum.minVal < minVal)
		    minVal = sum.minVal;
		if (sum.maxVal > maxVal)
		    maxVal = sum.maxVal;
		}
	    sumData += sum.sumData;
	    sumSquares += sum.sumSquares;
	    validCount += sum.validCount;
	    }
	}
    }
else
    {
    double ll, ul;
    enum wigCompare cmp;
    getWigFilter(database, curTable, &cmp, &ll, &ul);
    for (region = regionList; region != NULL; region = region->next)
        {
	struct lm *lm = lmInit(0);
	struct bbiInterval *iv, *ivList;
	ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
	for (iv = ivList; iv != NULL; iv = iv->next)
	    {
	    double val = iv->val;
	    double size = iv->end - iv->start;
	    if (validCount == 0)
		minVal = maxVal = val;
	    else
	        {
		if (val < minVal)
		    minVal = val;
		if (val > maxVal)
		    maxVal = val;
		}
	    sumData += size*val;
	    sumSquares += size*val*val;
	    validCount += size;
	    }
	lmCleanup(&lm);
	}
    }

hTableStart();
floatStatRow("mean", sumData/validCount);
floatStatRow("min", minVal);
floatStatRow("max", maxVal);
floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
numberStatRow("bases with data", validCount);
long long regionSize = basesInRegion(regionList,0);
long long gapTotal = gapsInRegion(conn, regionList,0);
numberStatRow("bases with sequence", regionSize - gapTotal);
numberStatRow("bases in region", regionSize);
wigFilterStatRow(conn);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
long wigFetchTime = clock1000() - startTime;
floatStatRow("load and calc time", 0.001*wigFetchTime);
hTableEnd();

bbiFileClose(&bwf);
htmlClose();
}
Ejemplo n.º 27
0
int main(int argc, char *argv[])
{
/*
1. urlpath bigwig
2. chrom
3. start
4. stop
5. spnum
6. outfile
7. summeth
*/
if(argc!=8)
	{
	fputs("bwquery: wrong arg\n", stderr);
	return 1;
	}

char *tail=NULL;
unsigned int start=strtol(argv[3],&tail,10);
if(tail[0]!='\0' || start<0)
	{
	fprintf(stderr, "bwquery: wrong start (%s)\n", argv[3]);
	return 1;
	}
unsigned int stop=strtol(argv[4],&tail,10);
if(tail[0]!='\0' || stop<=start)
	{
	fprintf(stderr, "bwquery: wrong stop (%s)\n", argv[4]);
	return 1;
	}
unsigned int spnum=strtol(argv[5],&tail,10);
if(tail[0]!='\0' || spnum<=0)
	{
	fprintf(stderr, "bwquery: wrong spnum (%s)\n", argv[5]);
	return 1;
	}
unsigned int summeth=strtol(argv[7],&tail,10);
if(tail[0]!='\0' || summeth<1)
	{
	fprintf(stderr, "bwquery: wrong summeth (%s)\n", argv[7]);
	return 1;
	}

double *data=malloc(sizeof(double)*spnum);
if(data==NULL)
	{
	fputs("bwquery: out of mem\n", stderr);
	return 1;
	}

int i;
for(i=0; i<spnum; i++) data[i]=0;

struct bbiFile *bwf = bigWigFileOpen(argv[1]);
if(bwf==NULL)
	{
	fprintf(stderr, "bwquery: no access to %s\n", argv[1]);
	return 1;
	}
bbiSummaryArray(bwf, argv[2], start, stop, (BbiFetchIntervals)bigWigIntervalQuery, 
	summeth==1?bbiSumMean:
	summeth==2?bbiSumMax:bbiSumMin,
	spnum, &data[0]);
bbiFileClose(&bwf);

FILE *fout=fopen(argv[6],"w");
if(fout==NULL)
	{
	fputs("bwquery: failed to open output file\n", stderr);
	return 1;
	}
for(i=0; i<spnum; i++)
	fprintf(fout, "%f\n", data[i]);
fclose(fout);


return 0;
}
Ejemplo n.º 28
0
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct bigWigValsOnChrom *valsOnChrom = bigWigValsOnChromNew();

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    long startBigQueryTime = clock1000();
    boolean gotData = bigWigValsOnChromFetchData(valsOnChrom, chrom->name, bbi);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    if (gotData)
	{
	double *valBuf = valsOnChrom->valBuf;
	Bits *covBuf = valsOnChrom->covBuf;

	/* Loop through all targets adding overlaps from ivList */
	long startOverlapTime = clock1000();
	struct target *target;
	for (target = targetList; target != NULL; target = target->next)
	    {
	    if (target->skip)
		continue;
	    struct genomeRangeTree *grt = target->grt;
	    struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	    if (targetTree != NULL)
		{
		struct range *range, *rangeList = rangeTreeList(targetTree);
		for (range = rangeList; range != NULL; range = range->next)
		    {
		    int s = range->start, e = range->end, i;
		    for (i=s; i<=e; ++i)
		        {
			if (bitReadOne(covBuf, i))
			    {
			    double x = valBuf[i];
			    target->uniqOverlapBases += 1;
			    target->overlapBases += x;
			    }
			}
		    }
		}
	    }
	long endOverlapTime = clock1000();
	totalOverlapTime += endOverlapTime - startOverlapTime;
	}
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    if (target->skip)
	continue;
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bigWigValsOnChromFree(&valsOnChrom);
bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}