Exemplo n.º 1
0
void bitmapToMaskArray(struct hash *bitmapHash, struct hash *tbHash)
/* Translate each bitmap in bitmapHash into an array of mask coordinates
 * in the corresponding twoBit in tbHash.  Assume tbHash's mask array is
 * empty at the start -- we allocate it here.  Free bitmap when done. */
{
    struct hashCookie cookie = hashFirst(tbHash);
    struct hashEl *hel = NULL;

    while ((hel = hashNext(&cookie)) != NULL)
    {
        char *seqName = hel->name;
        struct twoBit *tb = (struct twoBit *)(hel->val);
        struct hashEl *bHel = hashLookup(bitmapHash, seqName);
        Bits *bits;
        unsigned start=0, end=0;

        assert(tb != NULL);
        assert(tb->maskBlockCount == 0);
        if (bHel == NULL)
            errAbort("Missing bitmap for seq \"%s\"", seqName);
        bits = (Bits *)bHel->val;
        if (bits != NULL)
        {
            struct lm *lm = lmInit(0);
            struct unsignedRange *rangeList = NULL, *range = NULL;
            int i;
            for (;;)
            {
                start = bitFindSet(bits, end, tb->size);
                if (start >= tb->size)
                    break;
                end = bitFindClear(bits, start, tb->size);
                if (end > start)
                {
                    lmAllocVar(lm, range);
                    range->start = start;
                    range->size = (end - start);
                    slAddHead(&rangeList, range);
                }
            }
            slReverse(&rangeList);
            tb->maskBlockCount = slCount(rangeList);
            if (tb->maskBlockCount > 0)
            {
                AllocArray(tb->maskStarts, tb->maskBlockCount);
                AllocArray(tb->maskSizes, tb->maskBlockCount);
                for (i = 0, range = rangeList;  range != NULL;
                        i++, range = range->next)
                {
                    tb->maskStarts[i] = range->start;
                    tb->maskSizes[i] = range->size;
                }
            }
            lmCleanup(&lm);
            bitFree(&bits);
            bHel->val = NULL;
        }
    }
}
static struct bed *bitsToBed4List(Bits *bits, int bitSize, 
	char *chrom, int minSize, int rangeStart, int rangeEnd,
	struct lm *lm)
/* Translate ranges of set bits to bed 4 items. */
{
struct bed *bedList = NULL, *bed;
boolean thisBit, lastBit;
int start = 0;
int end = 0;
int id = 0;
char name[128];

if (rangeStart < 0)
    rangeStart = 0;
if (rangeEnd > bitSize)
    rangeEnd = bitSize;
end = rangeStart;

/* We depend on extra zero BYTE at end in case bitNot was used on bits. */
for (;;)
    {
    start = bitFindSet(bits, end, rangeEnd);
    if (start >= rangeEnd)
        break;
    end = bitFindClear(bits, start, rangeEnd);
    if (end - start >= minSize)
	{
	lmAllocVar(lm, bed);
	bed->chrom = chrom;
	bed->chromStart = start;
	bed->chromEnd = end;
	snprintf(name, sizeof(name), "%s.%d", chrom, ++id);
	bed->name = lmCloneString(lm, name);
	slAddHead(&bedList, bed);
	}
    }
slReverse(&bedList);
return(bedList);
}
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize)
/* Split up file into pieces at most pieceSize bases long, at gap boundaries 
 * if possible. */
{
off_t pieces = (estSize + pieceSize-1)/pieceSize;
int digits = digitsBaseTen(pieces);
int minGapSize = optionInt("minGapSize", 1000);
boolean noGapDrops = optionExists("noGapDrops");
int maxN = optionInt("maxN", pieceSize-1);
boolean oneFile = optionExists("oneFile");
char fileName[512];
char dirOnly[256], noPath[128];
int pos, pieceIx = 0, writeCount = 0;
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = NULL;
Bits *bits = NULL;
int seqCount = 0;
char *outFile = optionVal("out", NULL);
char *liftFile = optionVal("lift", NULL);
FILE *lift = NULL;
ZeroVar(&seq);

if (minGapSize < 1)
    errAbort("ERROR: minGapSize must be > 0");

splitPath(outRoot, dirOnly, noPath, NULL);
if (oneFile)
    {
    sprintf(fileName, "%s.fa", outRoot);
    f = mustOpen(fileName, "w");
    }
else
    fileName[0] = '\0';
if (liftFile)
    lift = mustOpen(liftFile, "w");

while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    bits = bitAlloc(seq.size);
    setBitsN(seq.dna, seq.size, bits);
    ++seqCount;
    if (outFile != NULL)
        {
	if (seqCount > 1)
	    errAbort("Can only handle in files with one sequence using out option");
	bitsForOut(outFile, seq.size, bits);
	}
    pos = 0;
    while (pos < seq.size)
        {
	boolean gotGap = FALSE;
	int gapStart = 0;
	int gapSize  = 0;
	int endSize  = seq.size - pos;
	int thisSize = min(endSize, pieceSize);
	int startGapLen = 0;

	if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N')
	    {
	    startGapLen = bitFindClear(bits, pos, endSize) - pos;
	    verbose(3,"#\tstarting gap at %d for length: %d\n", pos,
		startGapLen );
	    }
	/*	if a block is all gap for longer than minGapSize, then
 	 *	keep it all together in one large piece
	 */
	if (startGapLen > minGapSize)
	    {
	    if (noGapDrops)
		{
		writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		    f, noPath, pos, startGapLen, &seq, lift,
			&writeCount, fileName);
		}
	    else
		verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen);
	    thisSize = startGapLen;
	    }
	else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN)
	    {
	    if (endSize>pieceSize) /* otherwise chops tiny piece at very end */
	      {
		gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize,
				     minGapSize, &gapStart, &gapSize);
		if (gotGap)
		  thisSize = gapStart;
	      }
	    writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName);
	    }
	pos += thisSize;
	if (gotGap)
	    {
	    /*	last block is all gap, write it all out	*/
	    /*if ((pos + gapSize) >= seq.size)*/
	    if (noGapDrops)
		{
		writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		    f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName);
		verbose(3,
		    "#\tadding gapSize %d to pos %d -> %d and writing gap\n",
			gapSize, pos, pos+gapSize);
		}
	    else
		verbose(3,"#\tadding gapSize %d to pos %d -> %d\n",
			gapSize, pos, pos+gapSize);
	    pos += gapSize;
	    }
	}
    bitFree(&bits);
    }
carefulClose(&f);
carefulClose(&lift);
lineFileClose(&lf);
printf("%d pieces of %d written\n", writeCount, pieceIx);
}
Exemplo n.º 4
0
struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn,
	struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl,
	double filterUl, struct lm *lm)
/* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file
 * and if necessary apply filter and intersection.  Return list which is allocated in lm. */
{
char *chrom = region->chrom;
int chromSize = hChromSize(database, chrom);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm);

/* Run filter if necessary */
if (filterCmp != wigNoOp_e)
    {
    struct bbiInterval *next, *newList = NULL;
    for (iv = ivList; iv != NULL; iv = next)
        {
	next = iv->next;
	if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl))
	    {
	    slAddHead(&newList, iv);
	    }
	}
    slReverse(&newList);
    ivList = newList;
    }

/* Run intersection if necessary */
if (anyIntersection())
    {
    boolean isBpWise = intersectionIsBpWise();
    Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
    struct bbiInterval *next, *newList = NULL;
    double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01;
    double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01;
    char *op = cartString(cart, hgtaIntersectOp);
    for (iv = ivList; iv != NULL; iv = next)
	{
	next = iv->next;
	int start = iv->start;
	int size = iv->end - start;
	int overlap = bitCountRange(bits2, start, size);
	if (isBpWise)
	    {
	    if (overlap == size)
	        {
		slAddHead(&newList, iv);
		}
	    else if (overlap > 0)
	        {
		/* Here we have to break things up. */
		double val = iv->val;
		struct bbiInterval *partIv = iv;	// Reuse memory for first interval
		int s = iv->start, end = iv->end;
		for (;;)
		    {
		    s = bitFindSet(bits2, s, end);
		    if (s >= end)
		        break;
		    int bitsSet = bitFindClear(bits2, s, end) - s;
		    if (partIv == NULL)
			lmAllocVar(lm, partIv);
		    partIv->start = s;
		    partIv->end = s + bitsSet;
		    partIv->val = val;
		    slAddHead(&newList, partIv);
		    partIv = NULL;
		    s += bitsSet;
		    if (s >= end)
		        break;
		    }
		}
	    }
	else
	    {
	    double coverage = (double)overlap/size;
	    if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage))
		{
		slAddHead(&newList, iv);
		}
	    }
	}
    slReverse(&newList);
    ivList = newList;
    bitFree(&bits2);
    }

return ivList;
}