Пример #1
0
void indexedChainSubsetOnT(struct indexedChain *ixc, int subStart, int subEnd, 
    struct chain **retSubChain,  struct chain **retChainToFree)
/* Extract subset of chain that has been indexed. */
{
struct range *r = rangeTreeAllOverlapping(ixc->blockTree, subStart, subEnd);
if (r == NULL)
    *retSubChain = *retChainToFree = NULL;
else
    chainFastSubsetOnT(ixc->chain, r->val, subStart, subEnd, retSubChain, retChainToFree);
}
Пример #2
0
int chainBlockCoverage(struct indexedChain *ixc, int start, int end,
		       int* blockStarts, int *blockSizes, int blockCount)
/* Calculate how many of the blocks are covered at both block begin and
 * end by a chain. */
{
int blocksCovered = 0;
int i=0;

/* Find the part of the chain of interest to us. */
struct range *rangeList = rangeTreeAllOverlapping(ixc->blockTree, start, end);

/* Check to see how many of our exons the boxInList contains covers. 
   For each block check to see if the blockStart and blockEnd are 
   found in the boxInList. */
for(i=0; i<blockCount; i++)
    {
    boolean startFound = FALSE;
    int blockStart = blockStarts[i];
    int blockEnd = blockStarts[i] + blockSizes[i];
    struct range *r;

    /* Skip over bits of range list that are no longer relevant. */
    while (rangeList != NULL && rangeList->end <= blockStart)
        rangeList = rangeList->next;

    /* Count up blocks covered on both ends. */
    for (r = rangeList; r != NULL; r = r->next)
	{
	//    CCCCCC  CCCCC       CCCCCC    CCC  CCCC
	//     BBB   BBBB    BBB BBBBBBBB        BBBB
	//     yes    no     no     no      no   yes
	if(r->start <= blockStart && r->end >= blockStart)
	    startFound = TRUE;
	if(startFound && r->start <= blockEnd && r->end >= blockEnd)
	    {
	    blocksCovered++;
	    break;
	    }
	if (r->start > blockEnd)
	    break;
	}
    }
return blocksCovered;
}
Пример #3
0
struct range *rangeTreeMaxOverlapping(struct rbTree *tree, int start, int end)
/* Return item that overlaps most with start-end. Not thread safe.  Trashes list used
 * by rangeTreeAllOverlapping. */
{
struct range *range, *best = NULL;
int bestOverlap = 0; 
for (range  = rangeTreeAllOverlapping(tree, start, end); range != NULL; range = range->next)
    {
    int overlap = rangeIntersection(range->start, range->end, start, end);
    if (overlap > bestOverlap)
        {
	bestOverlap = overlap;
	best = range;
	}
    }
if (best)
    best->next = NULL; /* could be set by calls to List functions */
return best;
}
struct bed *breakUpBedAtCdsBreaks(struct cdsEvidence *cds, struct bed *bed)
/* Create a new broken-up that excludes part of gene between CDS breaks.  
 * Also jiggles cds->end coordinate to cope with the sequence we remove.
 * Deals with transcript to genome coordinate mapping including negative
 * strand.  Be afraid, be very afraid! */
{
/* Create range tree covering all breaks.  The coordinates here
 * are transcript coordinates.  While we're out it shrink outer CDS
 * since we are actually shrinking transcript. */
struct rbTree *gapTree = rangeTreeNew();
int bedSize = bed->chromEnd - bed->chromStart;
struct lm *lm = gapTree->lm;	/* Convenient place to allocate memory. */
int i, lastCds = cds->cdsCount-1;
for (i=0; i<lastCds; ++i)
    {
    int gapStart = cds->cdsStarts[i] + cds->cdsSizes[i];
    int gapEnd = cds->cdsStarts[i+1];
    int gapSize = gapEnd - gapStart;
    cds->end -= gapSize;
    rangeTreeAdd(gapTree, gapStart, gapEnd);
    }

/* Get list of exons in bed, flipped to reverse strand if need be. */
struct range *exon, *exonList = bedToExonList(bed, lm);
if (bed->strand[0] == '-')
    flipExonList(&exonList, bedSize);

/* Go through exon list, mapping each exon to transcript
 * coordinates. Check if exon needs breaking up, and if
 * so do so, as we copy it to new list. */
/* Copy exons to new list, breaking them up if need be. */
struct range *newList = NULL, *nextExon, *newExon;
int txStartPos = 0, txEndPos;
for (exon = exonList; exon != NULL; exon = nextExon)
    {
    txEndPos = txStartPos + exon->end - exon->start;
    nextExon = exon->next;
    struct range *gapList = rangeTreeAllOverlapping(gapTree, txStartPos, txEndPos);
    if (gapList != NULL)
        {
	verbose(3, "Splitting exon because of CDS gap\n");

	/* Make up exons from current position up to next gap.  This is a little
	 * complicated by possibly the gap starting before the exon. */
	int exonStart = exon->start;
	int txStart = txStartPos;
	struct range *gap;
	for (gap = gapList; gap != NULL; gap = gap->next)
	    {
	    int txEnd = gap->start;
	    int gapSize = rangeIntersection(gap->start, gap->end, txStart, txEndPos);
	    int exonSize = txEnd - txStart;
	    if (exonSize > 0)
		{
		lmAllocVar(lm, newExon);
		newExon->start = exonStart;
		newExon->end = exonStart + exonSize;
		slAddHead(&newList, newExon);
		}
	    else /* This case happens if gap starts before exon */
	        {
		exonSize = 0;
		}

	    /* Update current position in both transcript and genome space. */
	    exonStart += exonSize + gapSize;
	    txStart += exonSize + gapSize;
	    }

	/* Make up final exon from last gap to end, at least if we don't end in a gap. */
	if (exonStart < exon->end)
	    {
	    lmAllocVar(lm, newExon);
	    newExon->start = exonStart;
	    newExon->end = exon->end;
	    slAddHead(&newList, newExon);
	    }
	}
    else
        {
	/* Easy case where we don't intersect any gaps. */
	slAddHead(&newList, exon);
	}
    txStartPos= txEndPos;
    }
slReverse(&newList);

/* Flip exons back to forward strand if need be */
if (bed->strand[0] == '-')
    flipExonList(&newList, bedSize);

/* Convert exons to bed12 */
struct bed *newBed;
AllocVar(newBed);
newBed->chrom = cloneString(bed->chrom);
newBed->chromStart = newList->start + bed->chromStart;
newBed->chromEnd = newList->end + bed->chromStart;
newBed->name  = cloneString(bed->name);
newBed->score = bed->score;
newBed->strand[0] = bed->strand[0];
newBed->blockCount = slCount(newList);
AllocArray(newBed->blockSizes,  newBed->blockCount);
AllocArray(newBed->chromStarts,  newBed->blockCount);
for (exon = newList, i=0; exon != NULL; exon = exon->next, i++)
    {
    newBed->chromStarts[i] = exon->start;
    newBed->blockSizes[i] = exon->end - exon->start;
    newBed->chromEnd = exon->end + bed->chromStart;
    }

/* Clean up and go home. */
rbTreeFree(&gapTree);
return newBed;
}
Пример #5
0
void allWriteReadsToDir(char *regionFile, char *dir) {
	FILE *fp, *rd;
	char buf[500], readName[500], fileName[500], chr[50], fub[500];
	char str[2][500];
	char *readStr, *ch;
	int i, b, e, j, k;
	struct slName *ali;
	struct hashEl *el;
	struct rbTree *tr;
	struct range *rg;
	struct hash *localHash = NULL;

	fp = mustOpen(regionFile, "r");
	j = 0;
	while (fgets(buf, 500, fp)) {
		if (sscanf(buf, "%[^\t]\t%[^\t]\t%*s", str[0], str[1]) != 2)
			errAbort("error: %s", buf);
		++j;
		sprintf(fileName, "%s/R%d/reads.fq", dir, j);
		rd = mustOpen(fileName, "w");
		localHash = hashNew(8);
		for (i = 0; i < 2; i++) {
			if (sscanf(str[i], "%[^:]:%d-%d", chr, &b, &e) != 3)
				errAbort("error: %s", str[i]);
			el = hashLookup(aliHash, chr);
			tr = (struct rbTree *)(el->val);
			for (rg = rangeTreeAllOverlapping(tr, b, e); rg; rg = rg->next) {
				for (ali = (struct slName *)(rg->val); ali; ali = ali->next) {
					if (hashLookup(localHash, ali->name))
						continue;
					hashStoreName(localHash, ali->name);
					readStr = (char *)hashFindVal(readsHash, ali->name);
					if(readStr == NULL)
						continue;
					//assert(readStr);
					strcpy(fub, readStr);
					ch = strchr(fub, ' ');
					*ch = '\0';
					fprintf(rd, "@%s\n", ali->name);
					fprintf(rd, "%s\n", fub);
					++ch;
					fprintf(rd, "+%s\n", ali->name);
					fprintf(rd, "%s\n", ch);
         				strcpy(readName, ali->name);
					k = strlen(readName);
					/*
					if (readName[k-1] == '1')
						readName[k-1] = '2';
					else if (readName[k-1] == '2')
						readName[k-1] = '1';
					else
						errAbort("read identifier error: %s", readName);
						
					if (hashLookup(localHash, readName))
						continue;
					hashStoreName(localHash, readName);
					readStr = (char *)hashFindVal(readsHash, readName);

					assert(readStr);
					strcpy(fub, readStr);
					ch = strchr(fub, ' ');
					*ch = '\0';
					fprintf(rd, "@%s\n", readName);
					fprintf(rd, "%s\n", fub);
					++ch;
					fprintf(rd, "+%s\n", readName);
					fprintf(rd, "%s\n", ch); 
					*/
				}
			}
		}
		hashFree(&localHash);
		fclose(rd);
	}
	fclose(fp);
	hashFreeWithVals(&readsHash, freez);
	hashFreeWithVals(&aliHash, rbTreeFree);
}
Пример #6
0
void rangeTreeAddToCoverageDepth(struct rbTree *tree, int start, int end)
/* Add area from start to end to a tree that is being built up to store the
 * depth of coverage.  Recover coverage back out by looking at ptToInt(range->val)
 * on tree elements. */
{
struct range q;
q.start = start;
q.end = end;

struct range *r, *existing = rbTreeFind(tree, &q);
if (existing == NULL)
    {
    lmAllocVar(tree->lm, r);
    r->start = start;
    r->end = end;
    r->val = intToPt(1);
    rbTreeAdd(tree, r);
    }
else
    {
    if (existing->start <= start && existing->end >= end)
    /* The existing one completely encompasses us */
        {
	/* Make a new section for the bit before start. */
	if (existing->start < start)
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = existing->start;
	    r->end = start;
	    r->val = existing->val;
	    existing->start = start;
	    rbTreeAdd(tree, r);
	    }
	/* Make a new section for the bit after end. */
	if (existing->end > end)
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = end;
	    r->end = existing->end;
	    r->val = existing->val;
	    existing->end = end;
	    rbTreeAdd(tree, r);
	    }
	/* Increment existing section in overlapping area. */
        existing->val = (char *)(existing->val) + 1;
	}
    else
    /* In general case fetch list of regions that overlap us. 
       Remaining cases to handle are: 
	     r >> e     rrrrrrrrrrrrrrrrrrrr
			     eeeeeeeeee

	     e < r           rrrrrrrrrrrrrrr
			eeeeeeeeeeee

	     r < e      rrrrrrrrrrrr
			     eeeeeeeeeeeee
     */
        {
	struct range *existingList = rangeTreeAllOverlapping(tree, start, end);

#ifdef DEBUG
	/* Make sure that list is really sorted for debugging... */
	int lastStart = existingList->start;
	for (r = existingList; r != NULL; r = r->next)
	    {
	    int start = r->start;
	    if (start < lastStart)
	        internalErr();
	    }
#endif /* DEBUG */

	int s = start, e = end;
	for (existing = existingList; existing != NULL; existing = existing->next)
	    {
	    /* Deal with start of new range that comes before existing */
	    if (s < existing->start)
	        {
		lmAllocVar(tree->lm, r);
		r->start = s;
		r->end = existing->start;
		r->val = intToPt(1);
		s = existing->start;
		rbTreeAdd(tree, r);
		}
	    else if (s > existing->start)
	        {
		lmAllocVar(tree->lm, r);
		r->start = existing->start;
		r->end = s;
		r->val = existing->val;
		existing->start = s;
		rbTreeAdd(tree, r);
		}
	    existing->val = (char *)(existing->val) + 1;
	    s = existing->end;
	    }
	if (s < e)
	/* Deal with end of new range that doesn't overlap with anything. */
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = s;
	    r->end = e;
	    r->val = intToPt(1);
	    rbTreeAdd(tree, r);
	    }
	}
    }

}