void setPslBits(struct lineFile *lf, Bits *bits, struct psl *psl, int winStart, int winEnd) /* Set bits that are in psl. */ { int i, s, e, w, blockCount = psl->blockCount; boolean isRev = (psl->strand[1] == '-'); for (i=0; i<blockCount; ++i) { s = psl->tStarts[i]; e = s + psl->blockSizes[i]; if (isRev) { /* Use w as a temp variable to reverse coordinates s&e. */ w = psl->tSize - e; e = psl->tSize - s; s = w; } /* Clip, and if anything left set it. */ if (s < winStart) outOfRange(lf, psl->tName, psl->tSize); if (e > winEnd) outOfRange(lf, psl->tName, psl->tSize); w = e - s; if (w > 0) bitSetRange(bits, s, w); } }
void fbOrBed(Bits *acc, char *track, char *chrom, int chromSize) /* Or in bits of psl file that correspond to chrom. */ { struct lineFile *lf; char fileName[512]; char *row[3]; int s, e, w; chromFileName(track, chrom, fileName); if (!fileExists(fileName)) return; lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { if (sameString(row[0], chrom)) { s = lineFileNeedNum(lf, row, 1); if (s < 0) outOfRange(lf, chrom, chromSize); e = lineFileNeedNum(lf, row, 2); if (e > chromSize) outOfRange(lf, chrom, chromSize); w = e - s; if (w > 0) bitSetRange(acc, s, w); } } lineFileClose(&lf); }
struct twoBit *slurpInput(char *inName, struct hash *tbHash, struct hash *bitmapHash) /* Read .2bit file inName into memory and return list of twoBit items. * Populate tbHash with twoBit items, and bitmapHash with bitmaps for * easy masking. Both are hashed by twoBit sequence name. */ { struct twoBit *twoBitList = NULL; struct twoBit *twoBit = NULL; twoBitList = twoBitFromFile(inName); /* Free and clear the masking data (unless -add). Hash twoBits by name. */ for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { Bits *bits = bitAlloc(twoBit->size); if (add) { /* Store the currently masked bits: */ int i; for (i = 0; i < twoBit->maskBlockCount; i++) { bitSetRange(bits, twoBit->maskStarts[i], twoBit->maskSizes[i]); } } /* Free the current representation of masking -- it will be replaced. */ twoBit->maskBlockCount = 0; freez(&(twoBit->maskStarts)); freez(&(twoBit->maskSizes)); /* Hash twoBit and our new bitmap by sequence name. */ hashAddUnique(tbHash, twoBit->name, twoBit); hashAddUnique(bitmapHash, twoBit->name, bits); } return twoBitList; }
Bits *getMaskedBits(struct sqlConnection *conn, struct chromInfo *chrom) /* Get bit array with parts that are masked by simple repeats etc. masked * out. */ { char query[512]; char **row; struct sqlResult *sr; char table[128]; struct wabaChromHit *wchList = NULL, *wch; struct rmskOut ro; Bits *b = bitAlloc(chrom->size); int allCount = 0; int simpCount = 0; sqlSafef(query, sizeof query, "select * from %s_rmsk", chrom->chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { ++allCount; rmskOutStaticLoad(row, &ro); if (sameString(ro.repClass, "Simple_repeat") || sameString(ro.repClass, "Low_complexity")) { ++simpCount; assert(ro.genoEnd <= chrom->size); bitSetRange(b, ro.genoStart, ro.genoEnd - ro.genoStart); } } printf("Got %d repeats, %d simple/low complexity\n", allCount, simpCount); sqlFreeResult(&sr); return b; }
void setOverlapBits(int s, int e, int overlapStart, int overlapEnd, Bits *bits) /* Set part of bits corresponding to where s-e overlaps with overlapStart-End */ { s = max(s, overlapStart); e = min(e, overlapEnd); if (s < e) bitSetRange(bits, s - overlapStart, e - s); }
void setWithPad(struct chrom *chrom, int s, int e) /* Set bits from s to e with padding. */ { s -= pad; if (s < 0) s = 0; e += pad; if (e > chrom->size) e = chrom->size; bitSetRange(chrom->bits, s, e-s); }
void addMasking(struct hash *bitmapHash, char *seqName, unsigned start, unsigned end) /* Set bits in range. */ { if (end > start) { Bits *bits = (Bits *)hashMustFindVal(bitmapHash, seqName); bitSetRange(bits, start, (end - start)); } }
static void bitSetClippedRange(Bits *bits, int bitSize, int s, int e) /* Clip start and end to [0,bitSize) and set range. */ { int w; if (e > bitSize) e = bitSize; if (s < 0) s = 0; w = e - s; if (w > 0) bitSetRange(bits, s, w); }
void addMasking(struct hash *twoBitHash, struct hash *bitmapHash, char *seqName, unsigned start, unsigned end) /* Set bits in range. */ { if (end > start) { struct twoBit *tb = (struct twoBit *)hashMustFindVal(twoBitHash, seqName); if ((end > tb->size) || (start >= tb->size)) errAbort("bed range (%d - %d) is off the end of chromosome %s size %d", start, end, seqName, tb->size); Bits *bits = (Bits *)hashMustFindVal(bitmapHash, seqName); bitSetRange(bits, start, (end - start)); } }
boolean bigWigValsOnChromFetchData(struct bigWigValsOnChrom *chromVals, char *chrom, struct bbiFile *bigWig) /* Fetch data for chromosome from bigWig. Returns FALSE if not data on that chrom. */ { /* Fetch chromosome and size into self. */ freeMem(chromVals->chrom); chromVals->chrom = cloneString(chrom); long chromSize = chromVals->chromSize = bbiChromSize(bigWig, chrom); if (chromSize <= 0) return FALSE; /* Make sure buffers are big enough. */ if (chromSize > chromVals->bufSize) { freeMem(chromVals->valBuf); freeMem(chromVals->covBuf); chromVals->valBuf = needHugeMem((sizeof(double))*chromSize); chromVals->covBuf = bitAlloc(chromSize); chromVals->bufSize = chromSize; } /* Zero out buffers */ bitClear(chromVals->covBuf, chromSize); double *valBuf = chromVals->valBuf; int i; for (i=0; i<chromSize; ++i) valBuf[i] = 0.0; fetchIntoBuf(bigWig, chrom, 0, chromSize, chromVals); #ifdef OLD /* Fetch intervals for this chromosome and fold into buffers. */ struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bigWig, chrom, 0, chromSize, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; int end = iv->end; for (i=iv->start; i<end; ++i) valBuf[i] = val; bitSetRange(chromVals->covBuf, iv->start, iv->end - iv->start); } lmCleanup(&lm); #endif /* OLD */ return TRUE; }
void bitsForOut(char *fileName, int seqSize, Bits *bits) /* Get bitmap that corresponds to outFile. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *words[8]; int wordCount; boolean firstTime = TRUE; int start,end; /* Check and skip over three line header */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", fileName); line = skipLeadingSpaces(line); if (!startsWith("SW", line)) errAbort("%s is not a RepeatMasker .out file", fileName); lineFileNext(lf, &line, NULL); if (!startsWith("score", line)) errAbort("%s is not a RepeatMasker .out file", fileName); lineFileNext(lf, &line, NULL); for (;;) { if (!lineFileNext(lf, &line, NULL)) break; wordCount = chopLine(line, words); if (wordCount < 8) errAbort("Short line %d of %s\n", lf->lineIx, lf->fileName); start = lineFileNeedNum(lf, words, 5) - 1; end = lineFileNeedNum(lf, words, 6); if (start > end) errAbort("Start after end line %d of %s", lf->lineIx, lf->fileName); if (firstTime) { char *s = words[7]; if (s[0] != '(' || !isdigit(s[1])) errAbort("Expected parenthesized number line %d of %s", lf->lineIx, lf->fileName); if (seqSize != end + atoi(s+1)) errAbort("Size mismatch line %d of %s", lf->lineIx, lf->fileName); firstTime = FALSE; } if (end > seqSize) errAbort("End past bounds line %d of %s", lf->lineIx, lf->fileName); bitSetRange(bits, start, end-start); } lineFileClose(&lf); }
static void covAddRange(struct covStats *cov, int start, int end) /* Add range to stats. */ { struct region *r = cov->region; if (end > r->start && start < r->end) { int unclippedSize, size; unclippedSize = end - start; if (start < r->start) start = r->start; if (end > r->end) end = r->end; size = end-start; bitSetRange(cov->bits, start - r->start, size); cov->sumBases += size; cov->itemCount += 1; if (unclippedSize > cov->maxBases) cov->maxBases = unclippedSize; if (unclippedSize < cov->minBases) cov->minBases = unclippedSize; } }
static struct visiMatch *visiSearcherAdd(struct visiSearcher *searcher, int imageId, double weight, int startWord, int wordCount) /* Add given weight to match involving imageId, creating * a fresh match if necessary for imageId. */ { struct visiMatch key, *match; key.imageId = imageId; match = rbTreeFind(searcher->tree, &key); if (match == NULL) { match = visiMatchNew(imageId, searcher->wordCount); slAddHead(&searcher->matchList, match); rbTreeAdd(searcher->tree, match); } match->weight += weight; assert(startWord + wordCount <= searcher->wordCount); bitSetRange(match->wordBits, startWord, wordCount); return match; }
void fbOrChain(Bits *acc, char *track, char *chrom, int chromSize) /* Or in a chain file. */ { struct lineFile *lf; char fileName[512]; struct chain *chain; struct cBlock *b; chromFileName(track, chrom, fileName); if (!fileExists(fileName)) return; lf = lineFileOpen(fileName, TRUE); while ((chain = chainRead(lf)) != NULL) { for (b = chain->blockList; b != NULL; b = b->next) { int s = b->tStart, e = b->tEnd; if (s < 0) outOfRange(lf, chrom, chromSize); if (e > chromSize) outOfRange(lf, chrom, chromSize); bitSetRange(acc, b->tStart, b->tEnd - b->tStart); } chainFree(&chain); } }
static void fetchIntoBuf(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct bigWigValsOnChrom *chromVals) /* Get data for interval. Return list allocated out of lm. */ { /* A lot of code duplicated with bigWigIntervalQuery, but here the clipping * is simplified since always working across full chromosome, and the output is * different. Since both of these are in inner loops and speed critical, it's hard * to factor out without perhaps making it worse than the bit of duplication. */ if (bwf->typeSig != bigWigSig) errAbort("Trying to do fetchIntoBuf on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; Bits *covBuf = chromVals->covBuf; double *valBuf = chromVals->valBuf; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we * have to then go back through the data one unmerged block at a time. */ for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } /* Deal with insides of block. */ struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); bitSetRange(covBuf, s, e-s); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); bitSetRange(covBuf, s, head.itemSpan); bits32 e = s + head.itemSpan; bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; } break; } case bwgTypeFixedStep: { /* Do a little optimization for the most common and worst case - step1/span1 */ if (head.itemStep == 1 && head.itemSpan == 1) { bits32 s = head.start; bits32 e = head.end; bitSetRange(covBuf, s, e-s); bits32 j; for (j=s; j<e; ++j) valBuf[j] = memReadFloat(&blockPt, isSwapped); } else { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { bitSetRange(covBuf, s, head.itemSpan); val = memReadFloat(&blockPt, isSwapped); bits32 j; for (j=s; j<e; ++j) valBuf[j] = val; s += head.itemStep; e += head.itemStep; } } break; } default: internalErr(); break; } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); }