void axtAndBed(char *inAxt, char *inBed, char *outAxt) /* axtAndBed - Intersect an axt with a bed file and output axt.. */ { struct hash *tHash = readBed(inBed); /* target keyed, binKeeper value */ struct lineFile *lf = lineFileOpen(inAxt, TRUE); struct axt *axt; struct binElement *list = NULL, *el; FILE *f = mustOpen(outAxt, "w"); struct axtScoreScheme *ss = axtScoreSchemeDefault(); while ((axt = axtRead(lf)) != NULL) { struct chromInfo *ci = hashFindVal(tHash, axt->tName); if (ci != NULL) { list = binKeeperFind(ci->bk, axt->tStart, axt->tEnd); if (list != NULL) { /* Flatten out any overlapping elements by projecting them * onto a 0/1 valued character array and then looking for * runs of 1 in this array. */ int tStart = axt->tStart; int tEnd = axt->tEnd; int tSize = tEnd - tStart; int i, s = 0; char c, lastC = 0; char *merger = NULL; AllocArray(merger, tSize+1); for (el = list; el != NULL; el = el->next) { int s = el->start - tStart; int e = el->end - tStart; int sz; if (s < 0) s = 0; if (e > tSize) e = tSize; sz = e - s; if (sz > 0) memset(merger + s, 1, sz); } for (i=0; i<=tSize; ++i) { c = merger[i]; if (c && !lastC) { s = i; lastC = c; } else if (!c && lastC) { axtSubsetOnT(axt, s+tStart, i+tStart, ss, f); lastC = c; } } freez(&merger); slFreeList(&list); } } axtFree(&axt); } }
void bedIntersect(char *aFile, char *bFile, char *outFile) /* bedIntersect - Intersect two bed files. */ { struct lineFile *lf = lineFileOpen(aFile, TRUE); struct hash *bHash = readBed(bFile); FILE *f = mustOpen(outFile, "w"); char *row[40]; int wordCount; while ((wordCount = (strictTab ? lineFileChopTab(lf, row) : lineFileChop(lf, row))) != 0) { char *chrom = row[0]; int start = lineFileNeedNum(lf, row, 1); int end = lineFileNeedNum(lf, row, 2); if (start > end) errAbort("start after end line %d of %s", lf->lineIx, lf->fileName); if (start == end && !allowStartEqualEnd) lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)"); struct binKeeper *bk = hashFindVal(bHash, chrom); if (bk != NULL) { struct binElement *hitList = NULL, *hit; if (allowStartEqualEnd && start == end) hitList = binKeeperFind(bk, start-1, end+1); else hitList = binKeeperFind(bk, start, end); if (aHitAny) { for (hit = hitList; hit != NULL; hit = hit->next) { float cov = getCov(start, end, hit->val); if (cov >= minCoverage) { outputBed(f, row, wordCount, start, end, hit->val); break; } else { struct bed5 *b = hit->val; verbose(1, "filter out %s %d %d %d %d overlap %d %d %d %.3f\n", chrom, start, end, b->start, b->end, positiveRangeIntersection(start, end, b->start, b->end), end-start, b->end-b->start, cov); } } } else { for (hit = hitList; hit != NULL; hit = hit->next) { if (getCov(start, end, hit->val) >= minCoverage) outputBed(f, row, wordCount, start, end, hit->val); } } slFreeList(&hitList); } } }
void addBed(char *file, struct hash *fileHash) { char name[128]; if (!endsWith(file, ".bed")) errAbort("filenames in bed list must end in '.bed'"); splitPath(file, NULL, name, NULL); hashAdd(fileHash, name, readBed(file)); }