예제 #1
0
void addBigWigIntervalInfo(struct bbiFile *bbi, struct lm *lm, char *chrom, int start, int end,
    int *pSumSize, int *pSumCoverage, double *pSumVal)
/* Read in interval from bigBed and add it sums. */
{
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bbi, chrom, start, end, lm);
*pSumSize += (end - start);
for (iv = ivList; iv != NULL; iv = iv->next)
    {
    int cov1 = rangeIntersection(iv->start, iv->end, start, end);
    if (cov1 > 0)
	{
	*pSumCoverage += cov1;
	*pSumVal += cov1 * iv->val;
	}
    }
}
예제 #2
0
boolean bigWigValsOnChromFetchData(struct bigWigValsOnChrom *chromVals, char *chrom, 
	struct bbiFile *bigWig)
/* Fetch data for chromosome from bigWig. Returns FALSE if not data on that chrom. */
{
/* Fetch chromosome and size into self. */
freeMem(chromVals->chrom);
chromVals->chrom = cloneString(chrom);
long chromSize = chromVals->chromSize = bbiChromSize(bigWig, chrom);

if (chromSize <= 0)
    return FALSE;

/* Make sure buffers are big enough. */
if (chromSize > chromVals->bufSize)
    {
    freeMem(chromVals->valBuf);
    freeMem(chromVals->covBuf);
    chromVals->valBuf = needHugeMem((sizeof(double))*chromSize);
    chromVals->covBuf = bitAlloc(chromSize);
    chromVals->bufSize = chromSize;
    }

/* Zero out buffers */
bitClear(chromVals->covBuf, chromSize);
double *valBuf = chromVals->valBuf;
int i;
for (i=0; i<chromSize; ++i)
    valBuf[i] = 0.0;

fetchIntoBuf(bigWig, chrom, 0, chromSize, chromVals);

#ifdef OLD
/* Fetch intervals for this chromosome and fold into buffers. */
struct lm *lm = lmInit(0);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bigWig, chrom, 0, chromSize, lm);
for (iv = ivList; iv != NULL; iv = iv->next)
    {
    double val = iv->val;
    int end = iv->end;
    for (i=iv->start; i<end; ++i)
	valBuf[i] = val;
    bitSetRange(chromVals->covBuf, iv->start, iv->end - iv->start);
    }
lmCleanup(&lm);
#endif /* OLD */
return TRUE;
}
예제 #3
0
파일: wiggleClick.c 프로젝트: bowhan/kent
static void bigWigClick(struct trackDb *tdb, char *fileName)
/* Display details for BigWig data tracks. */
{
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = NULL;
struct lm *lm = lmInit(0);
struct bbiInterval *bbList = NULL;
char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery");

unsigned maxWTQ = 0;
if (isNotEmpty(maxWinToQuery))
    maxWTQ = sqlUnsigned(maxWinToQuery);

if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart))
    {
    bbi = bigWigFileOpen(fileName);
    bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm);
    }

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, BASE_1(winStart));
sprintLongWithCommas(num2Buf, winEnd);
printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf );
sprintLongWithCommas(num1Buf, winEnd-winStart);
printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf);

if (bbList != NULL)
    {
    bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd);
    }
else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart))
    {
    sprintLongWithCommas(num1Buf, maxWTQ);
    printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf);
    }
else
    {
    printf("<P>No data overlapping current position.</P>");
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
예제 #4
0
파일: bigWig.c 프로젝트: davidhoover/kent
struct bed *bigWigIntervalsToBed(struct sqlConnection *conn, char *table, struct region *region,
				 struct lm *lm)
/* Return a list of unfiltered, unintersected intervals in region as bed (for
 * secondary table in intersection). */
{
struct bed *bed, *bedList = NULL;
char *fileName = bigWigFileName(table, conn);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, region->chrom, region->start,
						      region->end, lm);
for (iv = ivList;  iv != NULL;  iv = iv->next)
    {
    lmAllocVar(lm, bed);
    bed->chrom = region->chrom;
    bed->chromStart = iv->start;
    bed->chromEnd = iv->end;
    slAddHead(&bedList, bed);
    }
slReverse(&bedList);
return bedList;
}
예제 #5
0
/* --- .Call ENTRY POINT --- */
SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_return_score, 
                   SEXP r_return_list) {
  pushRHandlers();
  struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename)));
  SEXP chromNames = getAttrib(r_ranges, R_NamesSymbol);
  int nchroms = length(r_ranges);
  Rboolean return_list = asLogical(r_return_list);
  SEXP rangesList, rangesListEls, dataFrameList, dataFrameListEls, ans;
  SEXP numericListEls;
  bool returnScore = asLogical(r_return_score);
  const char *var_names[] = { "score", "" };
  struct lm *lm = lmInit(0);
 
  struct bbiInterval *hits = NULL;
  struct bbiInterval *qhits = NULL;

  if (return_list) {
    int n_ranges = 0;
    for(int i = 0; i < nchroms; i++) {
      SEXP localRanges = VECTOR_ELT(r_ranges, i);
      n_ranges += get_IRanges_length(localRanges);
    }
    PROTECT(numericListEls = allocVector(VECSXP, n_ranges));
  } else {
    PROTECT(rangesListEls = allocVector(VECSXP, nchroms));
    setAttrib(rangesListEls, R_NamesSymbol, chromNames);
    PROTECT(dataFrameListEls = allocVector(VECSXP, nchroms));
    setAttrib(dataFrameListEls, R_NamesSymbol, chromNames);
  }

  int elt_len = 0;
  for (int i = 0; i < nchroms; i++) {
    SEXP localRanges = VECTOR_ELT(r_ranges, i);
    int nranges = get_IRanges_length(localRanges);
    int *start = INTEGER(get_IRanges_start(localRanges));
    int *width = INTEGER(get_IRanges_width(localRanges));
    for (int j = 0; j < nranges; j++) {
      struct bbiInterval *queryHits =
        bigWigIntervalQuery(file, (char *)CHAR(STRING_ELT(chromNames, i)),
                            start[j] - 1, start[j] - 1 + width[j], lm);
      /* IntegerList */
      if (return_list) {
        qhits = queryHits;
        int nqhits = slCount(queryHits);
        SEXP ans_numeric;
        PROTECT(ans_numeric = allocVector(REALSXP, width[j]));
        memset(REAL(ans_numeric), 0, sizeof(double) * width[j]);
        for (int k = 0; k < nqhits; k++, qhits = qhits->next) {
          for (int l = qhits->start; l < qhits->end; l++)
            REAL(ans_numeric)[(l - start[j] + 1)] = qhits->val;
        }
        SET_VECTOR_ELT(numericListEls, elt_len, ans_numeric);
        elt_len++;
        UNPROTECT(1);
      }
      slReverse(&queryHits);
      hits = slCat(queryHits, hits);
    } 

    /* RangedData */
    if (!return_list) {
      int nhits = slCount(hits);
      slReverse(&hits);
      SEXP ans_start, ans_width, ans_score, ans_score_l;
      PROTECT(ans_start = allocVector(INTSXP, nhits));
      PROTECT(ans_width = allocVector(INTSXP, nhits));

      if (returnScore) {
        PROTECT(ans_score_l = mkNamed(VECSXP, var_names));
        ans_score = allocVector(REALSXP, nhits);
        SET_VECTOR_ELT(ans_score_l, 0, ans_score);
      } else {
        PROTECT(ans_score_l = mkNamed(VECSXP, var_names + 1));
      }

      for (int j = 0; j < nhits; j++, hits = hits->next) {
        INTEGER(ans_start)[j] = hits->start + 1;
        INTEGER(ans_width)[j] = hits->end - hits->start;
        if (returnScore)
          REAL(ans_score)[j] = hits->val;
      }
      SET_VECTOR_ELT(rangesListEls, i,
                     new_IRanges("IRanges", ans_start, ans_width, R_NilValue));
      SET_VECTOR_ELT(dataFrameListEls, i,
                     new_DataFrame("DataFrame", ans_score_l, R_NilValue,
                                   ScalarInteger(nhits)));
      UNPROTECT(3);
    }
  }

  bbiFileClose(&file);

  if (return_list) {
    ans = new_SimpleList("SimpleList", numericListEls);
    UNPROTECT(1);
  } else { 
    PROTECT(dataFrameList =
            new_SimpleList("SimpleSplitDataFrameList", dataFrameListEls));
    PROTECT(rangesList = new_SimpleList("SimpleRangesList", rangesListEls));
    ans = new_RangedData("RangedData", rangesList, dataFrameList);
    UNPROTECT(4);
  }

  lmCleanup(&lm);
  popRHandlers();
  return ans;
}
예제 #6
0
/* This old way is ~3 times as slow */
void doEnrichmentsFromBigWig(struct sqlConnection *conn, 
    struct cdwFile *ef, struct cdwValidFile *vf, 
    struct cdwAssembly *assembly, struct target *targetList)
/* Figure out enrichments from a bigBed file. */
{
/* Get path to bigBed, open it, and read all chromosomes. */
char *bigWigPath = cdwPathForFileId(conn, ef->id);
struct bbiFile *bbi = bigWigFileOpen(bigWigPath);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);

/* This takes a while, so let's figure out what parts take the time. */
long totalBigQueryTime = 0;
long totalOverlapTime = 0;

/* Do a pretty complex loop that just aims to set target->overlapBases and ->uniqOverlapBases
 * for all targets.  This is complicated by just wanting to keep one chromosome worth of
 * bigWig data in memory. Also just for performance we do a lookup of target range tree to
 * get chromosome specific one to use, which avoids a hash lookup in the inner loop. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    /* Get list of intervals in bigWig for this chromosome, and feed it to a rangeTree. */
    struct lm *lm = lmInit(0);
    long startBigQueryTime = clock1000();
    struct bbiInterval *ivList = bigWigIntervalQuery(bbi, chrom->name, 0, chrom->size, lm);
    long endBigQueryTime = clock1000();
    totalBigQueryTime += endBigQueryTime - startBigQueryTime;
    struct bbiInterval *iv;

    /* Loop through all targets adding overlaps from ivList */
    long startOverlapTime = clock1000();
    struct target *target;
    for (target = targetList; target != NULL; target = target->next)
        {
	struct genomeRangeTree *grt = target->grt;
	struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name);
	if (targetTree != NULL)
	    {
	    for (iv = ivList; iv != NULL; iv = iv->next)
		{
		int overlap = rangeTreeOverlapSize(targetTree, iv->start, iv->end);
		target->uniqOverlapBases += overlap;
		target->overlapBases += overlap * iv->val;
		}
	    }
	}
    long endOverlapTime = clock1000();
    totalOverlapTime += endOverlapTime - startOverlapTime;
    lmCleanup(&lm);
    }

verbose(1, "totalBig %0.3f, totalOverlap %0.3f\n", 0.001*totalBigQueryTime, 0.001*totalOverlapTime);

/* Now loop through targets and save enrichment info to database */
struct target *target;
for (target = targetList; target != NULL; target = target->next)
    {
    struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, 
	target->overlapBases, target->uniqOverlapBases);
    cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128);
    cdwQaEnrichFree(&enrich);
    }

bbiChromInfoFreeList(&chromList);
bigWigFileClose(&bbi);
freez(&bigWigPath);
}
예제 #7
0
파일: bigWig.c 프로젝트: davidhoover/kent
struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn,
	struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl,
	double filterUl, struct lm *lm)
/* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file
 * and if necessary apply filter and intersection.  Return list which is allocated in lm. */
{
char *chrom = region->chrom;
int chromSize = hChromSize(database, chrom);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm);

/* Run filter if necessary */
if (filterCmp != wigNoOp_e)
    {
    struct bbiInterval *next, *newList = NULL;
    for (iv = ivList; iv != NULL; iv = next)
        {
	next = iv->next;
	if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl))
	    {
	    slAddHead(&newList, iv);
	    }
	}
    slReverse(&newList);
    ivList = newList;
    }

/* Run intersection if necessary */
if (anyIntersection())
    {
    boolean isBpWise = intersectionIsBpWise();
    Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
    struct bbiInterval *next, *newList = NULL;
    double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01;
    double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01;
    char *op = cartString(cart, hgtaIntersectOp);
    for (iv = ivList; iv != NULL; iv = next)
	{
	next = iv->next;
	int start = iv->start;
	int size = iv->end - start;
	int overlap = bitCountRange(bits2, start, size);
	if (isBpWise)
	    {
	    if (overlap == size)
	        {
		slAddHead(&newList, iv);
		}
	    else if (overlap > 0)
	        {
		/* Here we have to break things up. */
		double val = iv->val;
		struct bbiInterval *partIv = iv;	// Reuse memory for first interval
		int s = iv->start, end = iv->end;
		for (;;)
		    {
		    s = bitFindSet(bits2, s, end);
		    if (s >= end)
		        break;
		    int bitsSet = bitFindClear(bits2, s, end) - s;
		    if (partIv == NULL)
			lmAllocVar(lm, partIv);
		    partIv->start = s;
		    partIv->end = s + bitsSet;
		    partIv->val = val;
		    slAddHead(&newList, partIv);
		    partIv = NULL;
		    s += bitsSet;
		    if (s >= end)
		        break;
		    }
		}
	    }
	else
	    {
	    double coverage = (double)overlap/size;
	    if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage))
		{
		slAddHead(&newList, iv);
		}
	    }
	}
    slReverse(&newList);
    ivList = newList;
    bitFree(&bits2);
    }

return ivList;
}
예제 #8
0
void bigWigMerge(int inCount, char *inFiles[], char *outFile)
/* bigWigMerge - Merge together multiple bigWigs into a single one.. */
{
/* Make a list of open bigWig files. */
struct bbiFile *inFile, *inFileList = NULL;
int i;
for (i=0; i<inCount; ++i)
    {
    if (clInList)
        {
	addWigsInFile(inFiles[i], &inFileList);
	}
    else
	{
	inFile = bigWigFileOpen(inFiles[i]);
	slAddTail(&inFileList, inFile);
	}
    }

FILE *f = mustOpen(outFile, "w");

struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList);
verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", 
	slCount(chromList), slCount(inFileList));
double *mergeBuf = NULL;
int mergeBufSize = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct lm *lm = lmInit(0);

    /* Make sure merge buffer is big enough. */
    int chromSize = chrom->size;
    verboseDot();
    verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize);
    if (chromSize > mergeBufSize)
        {
	mergeBufSize = chromSize;
	freeMem(mergeBuf);
	mergeBuf = needHugeMem(mergeBufSize * sizeof(double));
	}
    int i;
    for (i=0; i<chromSize; ++i)
        mergeBuf[i] = 0.0;

    /* Loop through each input file grabbing data and merging it in. */
    for (inFile = inFileList; inFile != NULL; inFile = inFile->next)
        {
	struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm);
	verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName);
	struct bbiInterval *iv;
	for (iv = ivList; iv != NULL; iv = iv->next)
	    {
	    double val = iv->val;
	    if (val > clClip)
	        val = clClip;
	    int end = iv->end;
	    for (i=iv->start; i < end; ++i)
	         mergeBuf[i] += val;
	    }
	}


    /* Output each range of same values as a bedGraph item */
    int sameCount;
    for (i=0; i<chromSize; i += sameCount)
        {
	sameCount = doublesTheSame(mergeBuf+i, chromSize-i);
	double val = mergeBuf[i] + clAdjust;
	if (val > clThreshold)
	    fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val);
	}

    lmCleanup(&lm);
    }
verbose(1, "\n");

carefulClose(&f);
}