Пример #1
0
void axtAndBed(char *inAxt, char *inBed, char *outAxt)
/* axtAndBed - Intersect an axt with a bed file and output axt.. */
{
struct hash *tHash = readBed(inBed); /* target keyed, binKeeper value */
struct lineFile *lf = lineFileOpen(inAxt, TRUE);
struct axt *axt;
struct binElement *list = NULL, *el;
FILE *f = mustOpen(outAxt, "w");
struct axtScoreScheme *ss = axtScoreSchemeDefault();

while ((axt = axtRead(lf)) != NULL)
    {
    struct chromInfo *ci = hashFindVal(tHash, axt->tName);
    if (ci != NULL)
	{
	list = binKeeperFind(ci->bk, axt->tStart, axt->tEnd);
	if (list != NULL)
	    {
	    /* Flatten out any overlapping elements by projecting them
	     * onto a 0/1 valued character array and then looking for 
	     * runs of 1 in this array. */
	    int tStart = axt->tStart;
	    int tEnd = axt->tEnd;
	    int tSize = tEnd - tStart;
	    int i, s = 0;
	    char c, lastC = 0;
	    char *merger = NULL;
	    AllocArray(merger, tSize+1);
	    for (el = list; el != NULL; el = el->next)
		{
		int s = el->start - tStart;
		int e = el->end - tStart;
		int sz;
		if (s < 0) s = 0;
		if (e > tSize) e = tSize;
		sz = e - s;
		if (sz > 0)
		    memset(merger + s, 1, sz);
		}
	    for (i=0; i<=tSize; ++i)
		{
		c = merger[i];
		if (c && !lastC)
		    {
		    s = i;
		    lastC = c;
		    }
		else if (!c && lastC)
		    {
		    axtSubsetOnT(axt, s+tStart, i+tStart, ss, f);
		    lastC = c;
		    }
		}
	    freez(&merger);
	    slFreeList(&list);
	    }
	}
    axtFree(&axt);
    }
}
Пример #2
0
void bedIntersect(char *aFile, char *bFile, char *outFile)
/* bedIntersect - Intersect two bed files. */
{
struct lineFile *lf = lineFileOpen(aFile, TRUE);
struct hash *bHash = readBed(bFile);
FILE *f = mustOpen(outFile, "w");
char *row[40];
int wordCount;

while ((wordCount = (strictTab ? lineFileChopTab(lf, row) : lineFileChop(lf, row))) != 0)
    {
    char *chrom = row[0];
    int start = lineFileNeedNum(lf, row, 1);
    int end = lineFileNeedNum(lf, row, 2);
    if (start > end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (start == end && !allowStartEqualEnd)
	lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
    struct binKeeper *bk = hashFindVal(bHash, chrom);
    if (bk != NULL)
	{
	struct binElement *hitList = NULL, *hit;
	if (allowStartEqualEnd && start == end)
	    hitList = binKeeperFind(bk, start-1, end+1);
	else
	    hitList = binKeeperFind(bk, start, end);
	if (aHitAny)
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
		{
		float cov = getCov(start, end, hit->val);
		if (cov >= minCoverage)
		    {
		    outputBed(f, row, wordCount, start, end, hit->val);
		    break;
		    }
		else
		    {
		    struct bed5 *b = hit->val;
		    verbose(1, "filter out %s %d %d %d %d overlap %d %d %d %.3f\n",
			    chrom, start, end, b->start, b->end,
			    positiveRangeIntersection(start, end, b->start, b->end),
			    end-start, b->end-b->start, cov);
		    }
		}
	    }
	else
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
	        {
		if (getCov(start, end, hit->val) >= minCoverage)
		    outputBed(f, row, wordCount, start, end, hit->val);
		}
	    }
	slFreeList(&hitList);
	}
    }
}
void addBed(char *file, struct hash *fileHash)
{
char name[128];

if (!endsWith(file, ".bed"))
    errAbort("filenames in bed list must end in '.bed'");

splitPath(file, NULL, name, NULL);
hashAdd(fileHash, name, readBed(file));
}