void bitmapToMaskArray(struct hash *bitmapHash, struct hash *tbHash) /* Translate each bitmap in bitmapHash into an array of mask coordinates * in the corresponding twoBit in tbHash. Assume tbHash's mask array is * empty at the start -- we allocate it here. Free bitmap when done. */ { struct hashCookie cookie = hashFirst(tbHash); struct hashEl *hel = NULL; while ((hel = hashNext(&cookie)) != NULL) { char *seqName = hel->name; struct twoBit *tb = (struct twoBit *)(hel->val); struct hashEl *bHel = hashLookup(bitmapHash, seqName); Bits *bits; unsigned start=0, end=0; assert(tb != NULL); assert(tb->maskBlockCount == 0); if (bHel == NULL) errAbort("Missing bitmap for seq \"%s\"", seqName); bits = (Bits *)bHel->val; if (bits != NULL) { struct lm *lm = lmInit(0); struct unsignedRange *rangeList = NULL, *range = NULL; int i; for (;;) { start = bitFindSet(bits, end, tb->size); if (start >= tb->size) break; end = bitFindClear(bits, start, tb->size); if (end > start) { lmAllocVar(lm, range); range->start = start; range->size = (end - start); slAddHead(&rangeList, range); } } slReverse(&rangeList); tb->maskBlockCount = slCount(rangeList); if (tb->maskBlockCount > 0) { AllocArray(tb->maskStarts, tb->maskBlockCount); AllocArray(tb->maskSizes, tb->maskBlockCount); for (i = 0, range = rangeList; range != NULL; i++, range = range->next) { tb->maskStarts[i] = range->start; tb->maskSizes[i] = range->size; } } lmCleanup(&lm); bitFree(&bits); bHel->val = NULL; } } }
static struct bed *bitsToBed4List(Bits *bits, int bitSize, char *chrom, int minSize, int rangeStart, int rangeEnd, struct lm *lm) /* Translate ranges of set bits to bed 4 items. */ { struct bed *bedList = NULL, *bed; boolean thisBit, lastBit; int start = 0; int end = 0; int id = 0; char name[128]; if (rangeStart < 0) rangeStart = 0; if (rangeEnd > bitSize) rangeEnd = bitSize; end = rangeStart; /* We depend on extra zero BYTE at end in case bitNot was used on bits. */ for (;;) { start = bitFindSet(bits, end, rangeEnd); if (start >= rangeEnd) break; end = bitFindClear(bits, start, rangeEnd); if (end - start >= minSize) { lmAllocVar(lm, bed); bed->chrom = chrom; bed->chromStart = start; bed->chromEnd = end; snprintf(name, sizeof(name), "%s.%d", chrom, ++id); bed->name = lmCloneString(lm, name); slAddHead(&bedList, bed); } } slReverse(&bedList); return(bedList); }
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize) /* Split up file into pieces at most pieceSize bases long, at gap boundaries * if possible. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int minGapSize = optionInt("minGapSize", 1000); boolean noGapDrops = optionExists("noGapDrops"); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[512]; char dirOnly[256], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); if (minGapSize < 1) errAbort("ERROR: minGapSize must be > 0"); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } else fileName[0] = '\0'; if (liftFile) lift = mustOpen(liftFile, "w"); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } pos = 0; while (pos < seq.size) { boolean gotGap = FALSE; int gapStart = 0; int gapSize = 0; int endSize = seq.size - pos; int thisSize = min(endSize, pieceSize); int startGapLen = 0; if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N') { startGapLen = bitFindClear(bits, pos, endSize) - pos; verbose(3,"#\tstarting gap at %d for length: %d\n", pos, startGapLen ); } /* if a block is all gap for longer than minGapSize, then * keep it all together in one large piece */ if (startGapLen > minGapSize) { if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, startGapLen, &seq, lift, &writeCount, fileName); } else verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen); thisSize = startGapLen; } else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN) { if (endSize>pieceSize) /* otherwise chops tiny piece at very end */ { gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize, minGapSize, &gapStart, &gapSize); if (gotGap) thisSize = gapStart; } writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName); } pos += thisSize; if (gotGap) { /* last block is all gap, write it all out */ /*if ((pos + gapSize) >= seq.size)*/ if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName); verbose(3, "#\tadding gapSize %d to pos %d -> %d and writing gap\n", gapSize, pos, pos+gapSize); } else verbose(3,"#\tadding gapSize %d to pos %d -> %d\n", gapSize, pos, pos+gapSize); pos += gapSize; } } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn, struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl, double filterUl, struct lm *lm) /* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file * and if necessary apply filter and intersection. Return list which is allocated in lm. */ { char *chrom = region->chrom; int chromSize = hChromSize(database, chrom); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm); /* Run filter if necessary */ if (filterCmp != wigNoOp_e) { struct bbiInterval *next, *newList = NULL; for (iv = ivList; iv != NULL; iv = next) { next = iv->next; if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl)) { slAddHead(&newList, iv); } } slReverse(&newList); ivList = newList; } /* Run intersection if necessary */ if (anyIntersection()) { boolean isBpWise = intersectionIsBpWise(); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); struct bbiInterval *next, *newList = NULL; double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01; double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01; char *op = cartString(cart, hgtaIntersectOp); for (iv = ivList; iv != NULL; iv = next) { next = iv->next; int start = iv->start; int size = iv->end - start; int overlap = bitCountRange(bits2, start, size); if (isBpWise) { if (overlap == size) { slAddHead(&newList, iv); } else if (overlap > 0) { /* Here we have to break things up. */ double val = iv->val; struct bbiInterval *partIv = iv; // Reuse memory for first interval int s = iv->start, end = iv->end; for (;;) { s = bitFindSet(bits2, s, end); if (s >= end) break; int bitsSet = bitFindClear(bits2, s, end) - s; if (partIv == NULL) lmAllocVar(lm, partIv); partIv->start = s; partIv->end = s + bitsSet; partIv->val = val; slAddHead(&newList, partIv); partIv = NULL; s += bitsSet; if (s >= end) break; } } } else { double coverage = (double)overlap/size; if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage)) { slAddHead(&newList, iv); } } } slReverse(&newList); ivList = newList; bitFree(&bits2); } return ivList; }