void intersectOnChrom(char *db, struct sqlConnection *conn, char *chrom, char *track1, char *track2) /* Do intersection on one chromosome. */ { int chromSize = hChromSize(chrom); struct lm *lm = lmInit(0); struct bed *bedList1, *bedList2, *andBed; struct featureBits *fb1, *fb2; Bits *bit1, *bit2; int fieldCount1, fieldCount2; struct binKeeper *bk2; uglyTime(NULL); scanChromTable(conn, chrom, track1); scanChromTable(conn, chrom, track2); uglyTime("Scan tracks"); bedList1 = getChromAsBed(conn, db, track1, chrom, lm, &fieldCount1); bedList2 = getChromAsBed(conn, db, track2, chrom, lm, &fieldCount2); uglyTime("Tracks as bed"); uglyf("%d items with %d fields in %s, ", slCount(bedList1), fieldCount1, track1); uglyf("%d items with %d fields in %s\n", slCount(bedList2), fieldCount2, track2); bit1 = bitAlloc(chromSize+8); bit2 = bitAlloc(chromSize+8); uglyTime("bitAlloc"); fb1 = fbList(db, chrom, track1, bedList1, chromSize); fb2 = fbList(db, chrom, track1, bedList1, chromSize); uglyTime("bed to featureBits list"); fbOrBits(bit1, chromSize, fb1, 0); fbOrBits(bit2, chromSize, fb2, 0); uglyTime("or into bits"); bitAnd(bit1, bit2, chromSize); uglyTime("Anding bitfields"); andBed = bitsToBed4List(bit1, chromSize, chrom, 0, 0, chromSize, lm); uglyTime("Converting bitfield to bed 4"); bitCountAllOverlaps(bedList1, bit2, fieldCount2); uglyTime("Counting overlaps in track1 with bitfield of track2"); bk2 = fbToBinKeeper(fb2, chromSize); uglyTime("Adding featureBits list from track 2 into binKeeper."); bkCountAllOverlaps(bedList1, bk2, fieldCount2); uglyTime("Count overlaps in track1 with binKeeper of track2"); featureBitsFreeList(&fb1); featureBitsFreeList(&fb2); uglyTime("free featureBits"); bitFree(&bit1); bitFree(&bit2); uglyTime("bitFree"); }
void whyConserved(char *database, char *chrom, char *homologyTrack) /* whyConserved - Try and analyse why a particular thing is conserved. */ { struct sqlConnection *conn; int chromSize; Bits *h**o = NULL; Bits *bits = NULL; Bits *once = NULL; hSetDb(database); conn = hAllocConn(); chromSize = hChromSize(chrom); h**o = bitAlloc(chromSize); bits = bitAlloc(chromSize); once = bitAlloc(chromSize); /* Get homology bitmap and set once mask to be the same. */ fbOrTableBits(h**o, homologyTrack, chrom, chromSize, conn); bitOr(once, h**o, chromSize); /* printHeader */ printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "Track Specification", "track", "overlap", "track", "mus", "track", "new", "cum"); printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "", "size", "size", "geno", "track", "mus", "mus", "mus"); printf("-----------------------------------------------------------------------------\n"); /* Whittle awway at homology... */ explainSome(h**o, once, bits, chrom, chromSize, conn, NULL, homologyTrack); explainSome(h**o, once, bits, chrom, chromSize, conn, "simpleRepeat", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "rmsk", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:CDS:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:CDS:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "ensGene:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "rnaGene", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "mrna:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "intronEst:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "xenoMrna:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "xenoEst:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "genscan:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "genscanSubopt", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "psu:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "mrna:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "est", NULL); hFreeConn(&conn); }
Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize, boolean isBpWise) /* Get a bitmap that corresponds to the table we are intersecting with. * Consult CGI vars to figure out what table it is. */ { boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE); char *table2 = cartString(cart, hgtaIntersectTable); struct hTableInfo *hti2 = getHti(database, table2, conn); struct lm *lm2 = lmInit(64*1024); Bits *bits2 = bitAlloc(chromSize+8); struct bed *bedList2; if (isBigWigTable(table2)) bedList2 = bigWigIntervalsToBed(conn, table2, region, lm2); else // We should go straight to raw beds here, not through the routines that // do filter & intersections, because the secondary table has no filter // and sure shouldn't be intersected. :) bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL); if (!isBpWise) expandZeroSize(bedList2, hti2->hasBlocks, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (invTable2) bitNot(bits2, chromSize); lmCleanup(&lm2); return bits2; }
Bits *getMaskedBits(struct sqlConnection *conn, struct chromInfo *chrom) /* Get bit array with parts that are masked by simple repeats etc. masked * out. */ { char query[512]; char **row; struct sqlResult *sr; char table[128]; struct wabaChromHit *wchList = NULL, *wch; struct rmskOut ro; Bits *b = bitAlloc(chrom->size); int allCount = 0; int simpCount = 0; sqlSafef(query, sizeof query, "select * from %s_rmsk", chrom->chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { ++allCount; rmskOutStaticLoad(row, &ro); if (sameString(ro.repClass, "Simple_repeat") || sameString(ro.repClass, "Low_complexity")) { ++simpCount; assert(ro.genoEnd <= chrom->size); bitSetRange(b, ro.genoStart, ro.genoEnd - ro.genoStart); } } printf("Got %d repeats, %d simple/low complexity\n", allCount, simpCount); sqlFreeResult(&sr); return b; }
boolean cutBetween(struct segment *a, struct segment *b, int overlapStart, int overlapEnd, int overlapSize, boolean splitGene, int crossover) /* Try and cut out redundant parts where a and b overlap. * Don't cut a gene unless splitGene is true. Don't * cut a feature unless crossover point is specified (> 0) */ { if (crossover < 0) { Bits *bits = bitAlloc(overlapSize); genesToBits(a->geneList, bits, overlapStart, overlapEnd, splitGene); genesToBits(b->geneList, bits, overlapStart, overlapEnd, splitGene); featuresToBits(a->suboptList, bits, overlapStart, overlapEnd); featuresToBits(b->suboptList, bits, overlapStart, overlapEnd); crossover = findCrossover(bits, overlapStart, overlapEnd); bitFree(&bits); } if (crossover >= 0) { removeOutside(0, crossover, a); removeOutside(crossover, BIGNUM, b); return TRUE; } else return FALSE; }
struct twoBit *slurpInput(char *inName, struct hash *tbHash, struct hash *bitmapHash) /* Read .2bit file inName into memory and return list of twoBit items. * Populate tbHash with twoBit items, and bitmapHash with bitmaps for * easy masking. Both are hashed by twoBit sequence name. */ { struct twoBit *twoBitList = NULL; struct twoBit *twoBit = NULL; twoBitList = twoBitFromFile(inName); /* Free and clear the masking data (unless -add). Hash twoBits by name. */ for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { Bits *bits = bitAlloc(twoBit->size); if (add) { /* Store the currently masked bits: */ int i; for (i = 0; i < twoBit->maskBlockCount; i++) { bitSetRange(bits, twoBit->maskStarts[i], twoBit->maskSizes[i]); } } /* Free the current representation of masking -- it will be replaced. */ twoBit->maskBlockCount = 0; freez(&(twoBit->maskStarts)); freez(&(twoBit->maskSizes)); /* Hash twoBit and our new bitmap by sequence name. */ hashAddUnique(tbHash, twoBit->name, twoBit); hashAddUnique(bitmapHash, twoBit->name, bits); } return twoBitList; }
struct visiMatch *visiMatchNew(int imageId, int wordCount) /* Create a new visiMatch structure, as yet with no weight. */ { struct visiMatch *match; AllocVar(match); match->imageId = imageId; match->wordBits = bitAlloc(wordCount); return match; }
void trackOverlap(char *database, char *chrom, char *homologyTrack, char *specFile) /* trackOverlap - Correlate a track with a series of tracks specified in specFile. */ { struct lineFile *lf = NULL; char *line = NULL; struct sqlConnection *conn; int chromSize; Bits *h**o = NULL; Bits *bits = NULL; Bits *once = NULL; lf = lineFileOpen(specFile, TRUE); conn = hAllocConn(database); chromSize = hChromSize(database, chrom); h**o = bitAlloc(chromSize); bits = bitAlloc(chromSize); once = bitAlloc(chromSize); /* Get homology bitmap and set once mask to be the same. */ fbOrTableBits(database, h**o, homologyTrack, chrom, chromSize, conn); bitOr(once, h**o, chromSize); /* printHeader */ printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "Track Specification", "track", "overlap", "track", "cov", "track", "new", "cum"); printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "", "size", "size", "geno", "track", "cov", "cov", "cov"); printf("-----------------------------------------------------------------------------\n"); /* Whittle awway at homology... */ explainSome(database, h**o, once, bits, chrom, chromSize, conn, NULL, homologyTrack); while(lineFileNextReal(lf, &line)) { explainSome(database, h**o, once, bits, chrom, chromSize, conn, line, NULL); } lineFileClose(&lf); hFreeConn(&conn); }
struct covStats *covStatsNew(struct region *region) /* Get new covStats. */ { struct covStats *cov; AllocVar(cov); cov->region = region; cov->minBases = BIGNUM; if (region != NULL) { cov->bits = bitAlloc(region->end - region->start); } return cov; }
void statsOnSpan(char *database, struct sqlConnection *conn, struct region *r, char *axtBestDir, struct stats *stats, FILE *f, struct scoredWindow **pWinList) /* Gather region info on one chromosome/region. */ { char *chrom = r->chrom; int chromSize = hChromSize(database, chrom); Bits *maskBits = bitAlloc(chromSize); Bits *aliBits = bitAlloc(chromSize); Bits *matchBits = bitAlloc(chromSize); Bits *geneBits = bitAlloc(chromSize); /* Set up aliBits and matchBits for to be turned on * where bases align, and where bases align and match. * Zero both bitmaps in areas that are transcribed. */ setAliBits(axtBestDir, chrom, chromSize, aliBits, matchBits); maskFeatures(database, conn, chrom, chromSize, maskBits); bitNot(maskBits, chromSize); bitAnd(aliBits, maskBits, chromSize); bitAnd(matchBits, maskBits, chromSize); /* Set up maskBits to have 0's on gaps in genome */ bitClear(maskBits, chromSize); fbOrTableBits(database, maskBits, "gap", chrom, chromSize, conn); bitNot(maskBits, chromSize); /* Set up bitmap for Ensemble or mRNA. */ fbOrTableBits(database, geneBits, "ensGene", chrom, chromSize, conn); fbOrTableBits(database, geneBits, "mrna", chrom, chromSize, conn); /* Calculate various stats on windows. */ addToStats(stats, aliBits, matchBits, geneBits, maskBits, r, f, pWinList); /* Cleanup */ bitFree(&geneBits); bitFree(&maskBits); bitFree(&aliBits); bitFree(&matchBits); }
Bits *maskFromUpperCaseSeq(bioSeq *seq) /* Allocate a mask for sequence and fill it in based on * sequence case. */ { int size = seq->size, i; char *poly = seq->dna; Bits *b = bitAlloc(size); for (i=0; i<size; ++i) { if (isupper(poly[i])) bitSetOne(b, i); } return b; }
Bits *findUsedIds(char *netFileName) /* Create a bit array with 1's corresponding to * chainId's used in net file. */ { struct lineFile *lf = lineFileOpen(netFileName, TRUE); Bits *bits = bitAlloc(maxChainId); struct chainNet *net; while ((net = chainNetRead(lf)) != NULL) { chainNetMarkUsed(net, bits, maxChainId); chainNetFree(&net); } lineFileClose(&lf); return bits; }
boolean bigWigValsOnChromFetchData(struct bigWigValsOnChrom *chromVals, char *chrom, struct bbiFile *bigWig) /* Fetch data for chromosome from bigWig. Returns FALSE if not data on that chrom. */ { /* Fetch chromosome and size into self. */ freeMem(chromVals->chrom); chromVals->chrom = cloneString(chrom); long chromSize = chromVals->chromSize = bbiChromSize(bigWig, chrom); if (chromSize <= 0) return FALSE; /* Make sure buffers are big enough. */ if (chromSize > chromVals->bufSize) { freeMem(chromVals->valBuf); freeMem(chromVals->covBuf); chromVals->valBuf = needHugeMem((sizeof(double))*chromSize); chromVals->covBuf = bitAlloc(chromSize); chromVals->bufSize = chromSize; } /* Zero out buffers */ bitClear(chromVals->covBuf, chromSize); double *valBuf = chromVals->valBuf; int i; for (i=0; i<chromSize; ++i) valBuf[i] = 0.0; fetchIntoBuf(bigWig, chrom, 0, chromSize, chromVals); #ifdef OLD /* Fetch intervals for this chromosome and fold into buffers. */ struct lm *lm = lmInit(0); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bigWig, chrom, 0, chromSize, lm); for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; int end = iv->end; for (i=iv->start; i<end; ++i) valBuf[i] = val; bitSetRange(chromVals->covBuf, iv->start, iv->end - iv->start); } lmCleanup(&lm); #endif /* OLD */ return TRUE; }
Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize, boolean isBpWise) /* Get a bitmap that corresponds to the table we are intersecting with. * Consult CGI vars to figure out what table it is. */ { boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE); char *table2 = cartString(cart, hgtaIntersectTable); struct hTableInfo *hti2 = getHti(database, table2, conn); struct lm *lm2 = lmInit(64*1024); Bits *bits2 = bitAlloc(chromSize+8); struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL); if (!isBpWise) expandZeroSize(bedList2, hti2->hasBlocks, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (invTable2) bitNot(bits2, chromSize); lmCleanup(&lm2); return bits2; }
struct hash *setupChroms(char *fileName) /* Read a file of name/size pairs and turn it into * a hash full of chromosomes. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(8); struct chrom *chrom; char *row[2]; int chromCount = 0; while (lineFileRow(lf, row)) { AllocVar(chrom); hashAddSaveName(hash, row[0], chrom, &chrom->name); chrom->size = lineFileNeedNum(lf, row, 1); chrom->bits = bitAlloc(chrom->size); ++chromCount; } lineFileClose(&lf); return hash; }
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize) /* Split up file into pieces at most pieceSize bases long, at gap boundaries * if possible. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int minGapSize = optionInt("minGapSize", 1000); boolean noGapDrops = optionExists("noGapDrops"); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[512]; char dirOnly[256], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); if (minGapSize < 1) errAbort("ERROR: minGapSize must be > 0"); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } else fileName[0] = '\0'; if (liftFile) lift = mustOpen(liftFile, "w"); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } pos = 0; while (pos < seq.size) { boolean gotGap = FALSE; int gapStart = 0; int gapSize = 0; int endSize = seq.size - pos; int thisSize = min(endSize, pieceSize); int startGapLen = 0; if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N') { startGapLen = bitFindClear(bits, pos, endSize) - pos; verbose(3,"#\tstarting gap at %d for length: %d\n", pos, startGapLen ); } /* if a block is all gap for longer than minGapSize, then * keep it all together in one large piece */ if (startGapLen > minGapSize) { if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, startGapLen, &seq, lift, &writeCount, fileName); } else verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen); thisSize = startGapLen; } else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN) { if (endSize>pieceSize) /* otherwise chops tiny piece at very end */ { gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize, minGapSize, &gapStart, &gapSize); if (gotGap) thisSize = gapStart; } writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName); } pos += thisSize; if (gotGap) { /* last block is all gap, write it all out */ /*if ((pos + gapSize) >= seq.size)*/ if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName); verbose(3, "#\tadding gapSize %d to pos %d -> %d and writing gap\n", gapSize, pos, pos+gapSize); } else verbose(3,"#\tadding gapSize %d to pos %d -> %d\n", gapSize, pos, pos+gapSize); pos += gapSize; } } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
void chromFeatureBits(struct sqlConnection *conn,char *database, char *chrom, int tableCount, char *tables[], FILE *bedFile, FILE *faFile, FILE *binFile, struct bed *bedRegionList, FILE *bedOutFile, int chromSize, int *retChromBits, int *retFirstTableBits, int *retSecondTableBits) /* featureBits - Correlate tables via bitmap projections and booleans * on one chromosome. */ { int i; Bits *acc = NULL; Bits *bits = NULL; char *table; acc = bitAlloc(chromSize); bits = bitAlloc(chromSize); for (i=0; i<tableCount; ++i) { boolean not = FALSE; table = tables[i]; if (table[0] == '!') { not = TRUE; ++table; } if (i == 0) { orTable(database, acc, table, chrom, chromSize, conn); if (not) bitNot(acc, chromSize); if (retFirstTableBits != NULL) *retFirstTableBits = bitCountRange(acc, 0, chromSize); } else { bitClear(bits, chromSize); orTable(database, bits, table, chrom, chromSize, conn); if (not) bitNot(bits, chromSize); if (i == 1 && retSecondTableBits != NULL) *retSecondTableBits = bitCountRange(bits, 0, chromSize); /* feature/bug - the above does not respect minSize */ if (orLogic) bitOr(acc, bits, chromSize); else bitAnd(acc, bits, chromSize); } } if (notResults) bitNot(acc, chromSize); *retChromBits = bitCountRange(acc, 0, chromSize); if (bedFile != NULL || faFile != NULL) { minSize = optionInt("minSize", minSize); bitsToBed(database, acc, chrom, chromSize, bedFile, faFile, minSize); } if (binFile != NULL) { binSize = optionInt("binSize", binSize); binOverlap = optionInt("binOverlap", binOverlap); bitsToBins(acc, chrom, chromSize, binFile, binSize, binOverlap); } if (bedOutFile != NULL) bitsToRegions(acc, chrom, chromSize, bedRegionList, bedOutFile); bitFree(&acc); bitFree(&bits); }
static struct bed *intersectOnRegion( struct sqlConnection *conn, /* Open connection to database. */ struct region *region, /* Region to work inside */ char *table1, /* Table input list is from. */ struct bed *bedList1, /* List before intersection, should be * all within region. */ struct lm *lm, /* Local memory pool. */ int *retFieldCount) /* Field count. */ /* Intersect bed list, consulting CGI vars to figure out * with what table and how. Return intersected result, * which is independent from input. This potentially will * chew up bedList1. */ { /* Grab parameters for intersection from cart. */ double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0); double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100); boolean invTable = cartCgiUsualBoolean(cart, hgtaInvertTable, FALSE); char *op = intersectOp(); /* --- TODO MIKE - replace bedList2, bits2 with baseMask stuff. */ /* Load up intersecting bedList2 (to intersect with) */ int chromSize = hChromSize(database, region->chrom); boolean isBpWise = (sameString("and", op) || sameString("or", op)); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); /* Set up some other local vars. */ struct hTableInfo *hti1 = getHti(database, table1, conn); struct bed *intersectedBedList = NULL; /* Produce intersectedBedList. */ if (isBpWise) { /* --- TODO MIKE - replace, bits1 with baseMask stuff. */ /* Base-pair-wise operation: get bitmap for primary table too */ Bits *bits1 = bitAlloc(chromSize+8); boolean hasBlocks = hti1->hasBlocks; if (retFieldCount != NULL && (*retFieldCount < 12)) hasBlocks = FALSE; bedOrBits(bits1, chromSize, bedList1, hasBlocks, 0); /* invert inputs if necessary */ if (invTable) bitNot(bits1, chromSize); /* do the intersection/union */ if (sameString("and", op)) bitAnd(bits1, bits2, chromSize); else bitOr(bits1, bits2, chromSize); /* clip to region if necessary: */ if (region->start > 0) bitClearRange(bits1, 0, region->start); if (region->end < chromSize) bitClearRange(bits1, region->end, (chromSize - region->end)); /* translate back to bed */ intersectedBedList = bitsToBed4List(bits1, chromSize, region->chrom, 1, region->start, region->end, lm); if (retFieldCount != NULL) *retFieldCount = 4; bitFree(&bits1); } else intersectedBedList = filterBedByOverlap(bedList1, hti1->hasBlocks, op, moreThresh, lessThresh, bits2, chromSize); bitFree(&bits2); return intersectedBedList; }
struct bed *getRegionAsMergedBed( char *db, char *table, /* Database and table. */ struct region *region, /* Region to get data for. */ char *filter, /* Filter to add to SQL where clause if any. */ struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */ struct lm *lm, /* Where to allocate memory. */ int *retFieldCount) /* Number of fields. */ /* Return a bed list of all items in the given range in subtrack-merged table. * Cleanup result via lmCleanup(&lm) rather than bedFreeList. */ { if (! anySubtrackMerge(db, table)) return getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); else { struct hTableInfo *hti = getHtiOnDb(database, table); int chromSize = hChromSize(database, region->chrom); Bits *bits1 = NULL; Bits *bits2 = NULL; struct bed *bedMerged = NULL; struct trackDb *subtrack = NULL; char *primaryType = findTypeForTable(database,curTrack,table, ctLookupName); char *op = cartString(cart, hgtaSubtrackMergeOp); boolean isBpWise = (sameString(op, "and") || sameString(op, "or")); double moreThresh = cartDouble(cart, hgtaSubtrackMergeMoreThreshold); double lessThresh = cartDouble(cart, hgtaSubtrackMergeLessThreshold); boolean firstTime = TRUE; if (sameString(op, "cat")) { struct bed *bedList = getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(curTrack->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { subtrack = tdbRef->val; if (! sameString(curTable, subtrack->table) && isSubtrackMerged(subtrack->table) && sameString(subtrack->type, primaryType)) { struct bed *bedList2 = getRegionAsBed(db, subtrack->table, region, NULL, idHash, lm, retFieldCount); bedList = slCat(bedList, bedList2); } } slFreeList(&tdbRefList); return bedList; } bits1 = bitAlloc(chromSize+8); bits2 = bitAlloc(chromSize+8); /* If doing a base-pair-wise operation, then start with the primary * subtrack's ranges in bits1, and AND/OR all the selected subtracks' * ranges into bits1. If doing a non-bp-wise intersection, then * start with all bits clear in bits1, and then OR selected subtracks' * ranges into bits1. */ if (isBpWise) { struct lm *lm2 = lmInit(64*1024); struct bed *bedList1 = getRegionAsBed(db, table, region, filter, idHash, lm2, retFieldCount); bedOrBits(bits1, chromSize, bedList1, hti->hasBlocks, 0); lmCleanup(&lm2); } struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(curTrack->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { subtrack = tdbRef->val; if (! sameString(curTable, subtrack->table) && isSubtrackMerged(subtrack->table) && sameString(subtrack->type, primaryType)) { struct hTableInfo *hti2 = getHtiOnDb(database, subtrack->table); struct lm *lm2 = lmInit(64*1024); struct bed *bedList2 = getRegionAsBed(db, subtrack->table, region, NULL, idHash, lm2, NULL); if (firstTime) firstTime = FALSE; else bitClear(bits2, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (sameString(op, "and")) bitAnd(bits1, bits2, chromSize); else bitOr(bits1, bits2, chromSize); lmCleanup(&lm2); } } slFreeList(&tdbRefList); if (isBpWise) { bedMerged = bitsToBed4List(bits1, chromSize, region->chrom, 1, region->start, region->end, lm); if (retFieldCount != NULL) *retFieldCount = 4; } else { struct bed *bedList1 = getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); bedMerged = filterBedByOverlap(bedList1, hti->hasBlocks, op, moreThresh, lessThresh, bits1, chromSize); } bitFree(&bits1); bitFree(&bits2); return bedMerged; } }
void splitByCount(char *inName, int pieceSize, char *outRoot, off_t estSize, int extra) /* Split up file into pieces pieceSize long. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[PATH_LEN]; char dirOnly[PATH_LEN], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } if (liftFile) lift = mustOpen(liftFile, "w"); /* Count number of N's from s[0] to s[size-1]. * Treat any parts past end of string as N's. */ while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } for (pos = 0; pos < seq.size; pos += pieceSize) { char numOut[128]; int thisSize = seq.size - pos; if (thisSize > (pieceSize + extra)) thisSize = pieceSize + extra; if ((thisSize <= extra) && (pos > 0)) break; /* nobody wants duplicate smaller than extra overhang */ if (bitCountRange(bits, pos, thisSize) <= maxN) { if (!oneFile) { mkOutPath(fileName, outRoot, digits, pieceIx); f = mustOpen(fileName, "w"); } sprintf(numOut, "%s%0*d", noPath, digits, pieceIx); faWriteNext(f, numOut, seq.dna + pos, thisSize); if (lift) fprintf(lift, "%d\t%s\t%d\t%s\t%d\n", pos, numOut, thisSize, seq.name, seq.size); ++writeCount; if (!oneFile) carefulClose(&f); } pieceIx++; } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }