Пример #1
0
int bedFirstCdsSize(struct bed *bed)
/* Return size of coding portion of first coding exon. */
{
int chromStart = bed->chromStart;
if (bed->strand[0] == '-')
    {
    int i;
    for (i=bed->blockCount-1; i >= 0; --i)
        {
	int start = chromStart + bed->chromStarts[i];
	int end = start + bed->blockSizes[i];
	int cdsSize = rangeIntersection(start, end, bed->thickStart, bed->thickEnd);
	if (cdsSize > 0)
	    return cdsSize;
	}
    }
else
    {
    int i;
    for (i=0; i<bed->blockCount; ++i)
        {
	int start = chromStart + bed->chromStarts[i];
	int end = start + bed->blockSizes[i];
	int cdsSize = rangeIntersection(start, end, bed->thickStart, bed->thickEnd);
	if (cdsSize > 0)
	    return cdsSize;
	}
    }
return 0;
}
Пример #2
0
int orthoScore(struct orthoCdsArray *ortho, struct cdsEvidence *orf)
/* Return score consisting of 1 per real base in overlapping orthologous CDS */
{
int biggestSize = 0;
int biggestStart = -1, biggestEnd = -1;
int i;
int score = 0;
for (i=orf->start; i<orf->end; i += 3)
    {
    struct orthoCds *cds = &ortho->cdsArray[i];
    int size = rangeIntersection(cds->start, cds->end, orf->start, orf->end);
    if (size > biggestSize)
        {
	biggestSize = size;
	biggestStart = cds->start;
	biggestEnd = cds->end;
	}
    }
biggestStart = max(biggestStart, orf->start);
biggestEnd = min(biggestEnd, orf->end);
for (i=biggestStart; i < biggestEnd; ++i)
    {
    char base = ortho->cdsArray[i].base;
    if (base != '.' && base != '-')
        score += 1;
    }
// uglyf("orthoScore for %s %d-%d %s is %d\n", orf->name, orf->start, orf->end, ortho->species, score);
return score;
}
Пример #3
0
static void addCluster(struct lm *lm, struct peakItem *itemList, int start, int end,
	struct peakCluster **pList)
/* Make cluster of all items that overlap start/end, and put it on list. */
{
struct peakCluster *cluster;
lmAllocVar(lm, cluster);
double score = 0.0;
double maxSubScore = 0.0;
struct slRef  *refList = NULL, *ref;
struct peakItem *item;
for (item = itemList; item != NULL; item = item->next)
    {
    if (rangeIntersection(start, end, item->chromStart, item->chromEnd) > 0)
	{
	lmAllocVar(lm, ref);
	ref->val = item;
	slAddHead(&refList, ref);
	score += item->score;
	if (item->score > maxSubScore) maxSubScore = item->score;
	}
    }
slReverse(&refList);
cluster->chrom = itemList->chrom;
cluster->chromStart = start;
cluster->chromEnd = end;
cluster->itemRefList = refList;
cluster->score = score;
cluster->maxSubScore = maxSubScore;
slAddHead(pList, cluster);
}
Пример #4
0
static int isContained (MrfRead *currRead)
{
  MrfBlock* currBlock;
  Array annotatedTranscripts;
  Interval *currTranscript;
  SubInterval *currExon;
  int overlap;
  int i,j,k;

  for (i = 0; i < arrayMax (currRead->blocks); i++) {
    currBlock = arrp (currRead->blocks,i,MrfBlock);
    annotatedTranscripts = intervalFind_getOverlappingIntervals (currBlock->targetName,currBlock->targetStart,currBlock->targetEnd);
    for (j = 0; j < arrayMax (annotatedTranscripts); j++) {
      currTranscript = arru (annotatedTranscripts,j,Interval*);
      for (k = 0; k < arrayMax (currTranscript->subIntervals); k++) {
        currExon = arrp (currTranscript->subIntervals,k,SubInterval);
        overlap = rangeIntersection (currBlock->targetStart,currBlock->targetEnd,currExon->start,currExon->end);
        if (overlap > 0) {
          return 1;
        }
      } 
    }
  }
  return 0;
}
Пример #5
0
void bbiIntervalCorrelatePair(struct bbiInterval *a, struct bbiInterval *b, struct correlate *c)
/* Update c with information from bits of a and b that overlap. */
{
int overlap = rangeIntersection(a->start, a->end, b->start, b->end);
assert(overlap > 0);
correlateNextMulti(c, clamp(a->val), clamp(b->val), overlap);
}
Пример #6
0
boolean isSoftExon(struct altGraphX *agx, int edge)
/* Return TRUE if edge is an exon and has a soft start or soft end. */
{
int *vPos = agx->vPositions;
unsigned char *vT = agx->vTypes;
int *starts = agx->edgeStarts;
int *ends = agx->edgeEnds;
boolean soft = FALSE;
int i;
if(getSpliceEdgeType(agx, edge) != ggExon)
    return FALSE;
else if(vT[starts[edge]] == ggSoftStart || vT[ends[edge]] == ggSoftEnd)
    {
    soft = TRUE;
    if(!strict)
	{
	for(i = 0; i < agx->edgeCount; i++)
	    {
	    if(i == edge)
		continue;
	    if(isHardExon(agx, i) && 
	       rangeIntersection(vPos[starts[edge]], vPos[ends[edge]], vPos[starts[i]], vPos[ends[i]]) > 0)
		{
		return FALSE;
		}
	    }
	}
    }
return soft;
}
Пример #7
0
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end)
/* rt1dCreate - find items in 1-D range tree. */
{
struct lineFile *lf = lineFileOpen(tabFile, TRUE);
struct crTreeFile *crf = crTreeFileOpen(treeFile);
struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end);
verbose(2, "Got %d overlapping blocks\n", slCount(blockList));
for (block = blockList; block != NULL; block = block->next)
    {
    verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size);
    lineFileSeek(lf, block->offset, SEEK_SET);
    bits64 sizeUsed = 0;
    while (sizeUsed < block->size)
        {
	char *line;
	int size;
	if (!lineFileNext(lf, &line, &size))
	    errAbort("Couldn't read %s\n", lf->fileName);
	char *parsedLine = cloneString(line);
	char *row[3];
	if (chopLine(parsedLine, row) != ArraySize(row))
	    errAbort("Badly formatted line of %s\n%s", lf->fileName, line);
	char *bedChrom = row[0];
	bits32 bedStart = sqlUnsigned(row[1]);
	bits32 bedEnd = sqlUnsigned(row[2]);
	if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0)
	    fprintf(stdout, "%s\n", line);
	freeMem(parsedLine);
	sizeUsed += size;
	}
    }
crTreeFileClose(&crf);
}
Пример #8
0
void constExons(struct txGraph *graph, FILE *f)
/* Write out constituitive exons. */
{
    /* Create a tree with all introns. */
    struct rbTree *tree = rangeTreeNew();
    struct txEdge *edge;
    for (edge = graph->edgeList; edge != NULL; edge = edge->next)
    {
        if (edge->type == ggIntron)
        {
            rangeTreeAdd(tree, graph->vertices[edge->startIx].position,
                         graph->vertices[edge->endIx].position);
        }
    }

    /* Scan through all exons looking for ones that don't intersect
     * introns. */
    int eId = 0;
    for (edge = graph->edgeList; edge != NULL; edge = edge->next)
    {
        if (edge->type == ggExon)
        {
            struct txVertex *s = &graph->vertices[edge->startIx];
            struct txVertex *e = &graph->vertices[edge->endIx];
            if (s->type == ggHardStart && e->type == ggHardEnd)
            {
                int start = s->position;
                int end = e->position;
                if (!rangeTreeOverlaps(tree, start, end))
                {
                    char *refSource = refSourceAcc(graph, edge);
                    if (refSource != NULL && edge->evCount >= 10)
                    {
                        /* Do one more scan making sure that it doesn't
                         * intersect any exons except for us. */
                        boolean anyOtherExon = FALSE;
                        struct txEdge *ed;
                        for (ed = graph->edgeList; ed != NULL; ed = ed->next)
                        {
                            if (ed != edge)
                            {
                                int edStart = graph->vertices[ed->startIx].position;
                                int edEnd = graph->vertices[ed->endIx].position;
                                if (rangeIntersection(edStart, edEnd, start, end) > 0)
                                {
                                    anyOtherExon = TRUE;
                                    break;
                                }
                            }
                        }
                        if (!anyOtherExon)
                            fprintf(f, "%s\t%d\t%d\t%s.%d\t0\t%s\n",
                                    graph->tName, start, end, refSource, ++eId, graph->strand);
                    }
                }
            }
        }
    }
    rangeTreeFree(&tree);
}
Пример #9
0
boolean pslOverlap(struct psl *a, struct psl *b)
/* Returns TRUE if two psl's overlap. */
{
if (doTarget)
    {
    if (!sameString(a->tName, b->tName))
        return FALSE;
    return rangeIntersection(a->tStart, a->tEnd, b->tStart, b->tEnd) > 0;
    }
else
    {
    if (!sameString(a->qName, b->qName))
        return FALSE;
    return rangeIntersection(a->qStart, a->qEnd, b->qStart, b->qEnd) > 0;
    }
}
Пример #10
0
void addInterSize(void *item)
/* Add range to interSize. */
{
struct simpleRange *r = item;
int size;
size = rangeIntersection(r->start, r->end, interRange.start, interRange.end);
interSize += size;
}
Пример #11
0
boolean hitsRegions(char *chrom, int start, int end, struct region *regionList)
/* Return TRUE if position intersects any region on list. */
{
struct region *r;
for (r = regionList; r != NULL; r = r->next)
    {
    if (sameString(chrom, r->chrom) 
    	&& rangeIntersection(start, end, r->start, r->end) > 0)
	return TRUE;
    }
return FALSE;
}
Пример #12
0
static boolean breakUpIfOnDiagonal(struct block *blockList, boolean isRc,
	char *qName, char *tName, int qSize, int tSize,
	struct block *retBlockLists[], int maxBlockLists, int *retCount) 
/* If any blocks are on diagonal, remove the blocks and separate the lists 
 * of blocks before and after the diagonal. Store block list pointers in 
 * retBlockLists, the number of lists in retCount, and return TRUE if 
 * we found any blocks on diagonal so we know to rescore afterwards. */
{
int blockListIndex = 0;
boolean brokenUp = FALSE;

retBlockLists[blockListIndex] = blockList;
if (sameString(qName, tName))
    {
    struct block *block = NULL, *lastBlock = NULL;
    int i = 0;
    for (block = blockList;  block != NULL;  block = block->next)
	{
	int qStart = block->qStart;
	int qEnd   = block->qEnd;
	if (lastBlock != NULL && block == retBlockLists[blockListIndex])
	    freez(&lastBlock);
	if (isRc)
	    reverseIntRange(&qStart, &qEnd, qSize);
	if (rangeIntersection(block->tStart, block->tEnd, qStart, qEnd) > 0)
	    {
	    brokenUp = TRUE;
	    if (block != retBlockLists[blockListIndex])
		{
		assert(lastBlock != NULL);
		lastBlock->next = NULL;
		blockListIndex++;
		if (blockListIndex >= maxBlockLists)
		    errAbort("breakUpIfOnDiagonal: Too many fragmented block lists!");
		}
	    retBlockLists[blockListIndex] = block->next;
	    }
	lastBlock = block;
	}
    if (retBlockLists[blockListIndex] == NULL)
	{
	blockListIndex--;
	if (lastBlock != NULL)
	    freez(&lastBlock);
	}
    for (i=0;  i <= blockListIndex; i++)
	{
	retBlockLists[i] = removeFrayedEnds(retBlockLists[i]);
	}
    }
*retCount = blockListIndex + 1;
return brokenUp;
}
Пример #13
0
void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, 
	bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares,  
	int reduction, struct bbiSummary **pOutList)
/* Add data range to summary - putting it onto top of list if possible, otherwise
 * expanding list. */
{
struct bbiSummary *sum = *pOutList;
if (end > chromSize)	// Avoid pathological clipping situation on bad input
    end = chromSize;
while (start < end)
    {
    /* See if need to allocate a new summary. */
    if (sum == NULL || sum->chromId != chromId || sum->end <= start)
        {
	struct bbiSummary *newSum;
	AllocVar(newSum);
	newSum->chromId = chromId;
	if (sum == NULL || sum->chromId != chromId || sum->end + reduction <= start)
	    newSum->start = start;
	else
	    newSum->start = sum->end;
	newSum->end = newSum->start + reduction;
	if (newSum->end > chromSize)
	    newSum->end = chromSize;
	newSum->minVal = minVal;
	newSum->maxVal = maxVal;
	sum = newSum;
	slAddHead(pOutList, sum);
	}

    /* Figure out amount of overlap between current summary and item */
    int overlap = rangeIntersection(start, end, sum->start, sum->end);
    if (overlap <= 0) 
	{
        warn("%u %u doesn't intersect %u %u, chromId %u chromSize %u", start, end, sum->start, sum->end, chromId, chromSize);
	internalErr();
	}
    int itemSize = end - start;
    double overlapFactor = (double)overlap/itemSize;

    /* Fold overlapping bits into output. */
    sum->validCount += overlapFactor * validCount;
    if (sum->minVal > minVal)
        sum->minVal = minVal;
    if (sum->maxVal < maxVal)
        sum->maxVal = maxVal;
    sum->sumData += overlapFactor * sumData;
    sum->sumSquares += overlapFactor * sumSquares;

    /* Advance over overlapping bits. */
    start += overlap;
    }
}
int bkCountOverlappingRange(struct binKeeper *bk, int start, int end)
/* Return biggest overlap of anything in binKeeper with given range. */
{
struct binElement *el, *list = binKeeperFind(bk, start, end);
int overlap, bestOverlap = 0;

for (el = list; el != NULL; el = el->next)
    {
    overlap = rangeIntersection(el->start, el->end, start, end);
    if (overlap > bestOverlap)
        bestOverlap = overlap;
    }
return bestOverlap;
}
Пример #15
0
struct edge *edgeFromConsensusOfEvidence(struct rbTree *vertexTree, struct evidence *evList,
	struct lm *lm)
/* Attempt to create a single edge from a list of overlapping evidence ranges.
 * The start will be the consensus of all evidence starts.  Likewise
 * the end will be the consensus of all evidence ends.  The evidence that
 * overlaps this edge will be included in the edge. */
{
/* Gather up lists of starts and ends. */
struct sourceAndPos *startList = NULL, *endList = NULL;
struct evidence *ev, *nextEv;
int listSize = 0;
for (ev = evList; ev != NULL; ev = ev->next)
    {
    struct sourceAndPos *x;
    boolean trusted = trustedSource(ev->lb->sourceType);
    lmAllocVar(lm, x);
    x->position = ev->start;
    x->trustedSource = trusted;
    slAddHead(&startList, x);
    lmAllocVar(lm, x);
    x->position = ev->end;
    x->trustedSource = trusted;
    slAddHead(&endList, x);
    ++listSize;
    }

/* Get consensus starts and ends. */
slSort(&startList, sourceAndPosCmp);
struct vertex *start = consensusVertex(vertexTree, startList, listSize, ggSoftStart);
slSort(&endList, sourceAndPosCmpRev);
struct vertex *end = consensusVertex(vertexTree, endList, listSize, ggSoftEnd);

/* Make edge */
struct edge *edge;
AllocVar(edge);
edge->start = start;
edge->end = end;
edge->next = NULL;

/* Add overlapping evidence to edge. */
for (ev = evList; ev != NULL; ev = nextEv)
    {
    nextEv = ev->next;
    if (rangeIntersection(ev->start, ev->end, start->position, end->position) > 0)
        slAddHead(&edge->evList, ev);
    }

return edge;
}
Пример #16
0
static int isContained (MrfRead *currRead, char *targetName, int targetStart, int targetEnd)
{
  MrfBlock* currBlock;
  int i;

  for (i = 0; i < arrayMax (currRead->blocks); i++) {
    currBlock = arrp (currRead->blocks,i,MrfBlock);
    if (strEqual (currBlock->targetName,targetName)) {
      if (rangeIntersection (currBlock->targetStart,currBlock->targetEnd,targetStart,targetEnd) > 0 ) {
        return 1;
      }     
    }
  }
  return 0;
}
Пример #17
0
void addBigWigIntervalInfo(struct bbiFile *bbi, struct lm *lm, char *chrom, int start, int end,
    int *pSumSize, int *pSumCoverage, double *pSumVal)
/* Read in interval from bigBed and add it sums. */
{
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bbi, chrom, start, end, lm);
*pSumSize += (end - start);
for (iv = ivList; iv != NULL; iv = iv->next)
    {
    int cov1 = rangeIntersection(iv->start, iv->end, start, end);
    if (cov1 > 0)
	{
	*pSumCoverage += cov1;
	*pSumVal += cov1 * iv->val;
	}
    }
}
Пример #18
0
static struct psl *mapCDnaCDnaAln(struct hapRegions *hr, struct cDnaAlign *refAln, struct psl *mappedHap)
/* create cdna to cdna alignments from mappedHap and refAln, return
 * NULL if can't be mapped */
{
    struct psl *cDnaCDnaAln = NULL;
    if (sameString(refAln->psl->tName, mappedHap->tName)
            && rangeIntersection(refAln->psl->tStart, refAln->psl->tEnd,
                                 mappedHap->tStart, mappedHap->tEnd))
    {
        pslSwap(mappedHap, FALSE);
        cDnaCDnaAln = pslTransMap(pslTransMapNoOpts, refAln->psl, mappedHap);
        pslSwap(mappedHap, FALSE);
        if ((hr->hapRefCDnaFh != NULL) && (cDnaCDnaAln != NULL))
            cDnaAlignPslOut(cDnaCDnaAln, refAln->alnId, hr->hapRefCDnaFh);
    }
    return cDnaCDnaAln;
}
Пример #19
0
struct range *rangeTreeMaxOverlapping(struct rbTree *tree, int start, int end)
/* Return item that overlaps most with start-end. Not thread safe.  Trashes list used
 * by rangeTreeAllOverlapping. */
{
struct range *range, *best = NULL;
int bestOverlap = 0; 
for (range  = rangeTreeAllOverlapping(tree, start, end); range != NULL; range = range->next)
    {
    int overlap = rangeIntersection(range->start, range->end, start, end);
    if (overlap > bestOverlap)
        {
	bestOverlap = overlap;
	best = range;
	}
    }
if (best)
    best->next = NULL; /* could be set by calls to List functions */
return best;
}
Пример #20
0
struct protFeature *highestScoringFeature(struct protFeature *start, struct protFeature *end,
	int rangeStart, int rangeEnd)
/* Return highest scoring feature from start up to end. */
{
struct protFeature *bestFeat = NULL, *feat;
double bestScore = -1.0;
for (feat = start; feat != end ; feat = feat->next)
    {
    if (rangeIntersection(rangeStart, rangeEnd, feat->start, feat->end) > 0)
	{
	if (feat->score > bestScore)
	    {
	    bestFeat = feat;
	    bestScore = feat->score;
	    }
	}
    }
return bestFeat;
}
void writeAnswer(struct clone *cloneList, char *fileName)
/* Write out answer, assuming cloneList is sorted. */
{
FILE *f = mustOpen(fileName, "w");
struct clone *nextClone = NULL, *clone;
int end = 0;

for (clone = cloneList; clone != NULL; clone = nextClone)
    {
    nextClone = clone->next;
    dumpClone(clone, f);
    if (clone->end > end) end = clone->end;
    if (nextClone == NULL || rangeIntersection(clone->start, end, nextClone->start, nextClone->end) <= 0)
        {
	fprintf(f, "\n");
	}
    }
carefulClose(&f);
}
void removeOutside(int start, int end, struct segment *seg)
/* Remove parts of seg outside of range start-end. */
{
struct genScanGene *gene, *nextGene, *geneList = NULL;

for (gene = seg->geneList; gene != NULL; gene = nextGene)
    {
    nextGene = gene->next;
    if (rangeIntersection(start, end, gene->start, gene->end))
        {
	slAddHead(&geneList, gene);
	gene->featureList = removeFeaturesOutside(start, end, gene->featureList);
	calcGeneBounds(gene);
	}
    }
slReverse(&geneList);
seg->geneList = geneList;
seg->suboptList = removeFeaturesOutside(start, end, seg->suboptList);
}
static bits32 bbiSummarySlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, 
	struct bbiSummary *sumList, struct bbiSummaryElement *el)
/* Update retVal with the average value if there is any data in interval.  Return number
 * of valid data bases in interval. */
{
bits32 validCount = 0;

if (sumList != NULL)
    {
    double minVal = sumList->minVal;
    double maxVal = sumList->maxVal;
    double sumData = 0, sumSquares = 0;

    struct bbiSummary *sum;
    for (sum = sumList; sum != NULL && sum->start < baseEnd; sum = sum->next)
	{
	int overlap = rangeIntersection(baseStart, baseEnd, sum->start, sum->end);
	if (overlap > 0)
	    {
	    double overlapFactor = (double)overlap / (sum->end - sum->start);
	    validCount += sum->validCount * overlapFactor;
	    sumData += sum->sumData * overlapFactor;
	    sumSquares += sum->sumSquares * overlapFactor;
	    if (maxVal < sum->maxVal)
		maxVal = sum->maxVal;
	    if (minVal > sum->minVal)
		minVal = sum->minVal;
	    }
	}
    if (validCount > 0)
	{
	el->validCount = validCount;
	el->minVal = minVal;
	el->maxVal = maxVal;
	el->sumData = sumData;
	el->sumSquares = sumSquares;
	}
    }
return validCount;
}
static bits32 bbiIntervalSlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, 
	struct bbiInterval *intervalList, struct bbiSummaryElement *el)
/* Update retVal with the average value if there is any data in interval.  Return number
 * of valid data bases in interval. */
{
double validCount = 0;

if (intervalList != NULL)
    {
    struct bbiInterval *interval;
    double sumData = 0, sumSquares = 0;
    double minVal = intervalList->val;
    double maxVal = intervalList->val;

    for (interval = intervalList; interval != NULL && interval->start < baseEnd; 
	    interval = interval->next)
	{
	int overlap = rangeIntersection(baseStart, baseEnd, interval->start, interval->end);
	if (overlap > 0)
	    {
	    int intervalSize = interval->end - interval->start;
	    double overlapFactor = (double)overlap / intervalSize;
	    double intervalWeight = intervalSize * overlapFactor;
	    validCount += intervalWeight;
	    sumData += interval->val * intervalWeight;
	    sumSquares += interval->val * interval->val * intervalWeight;
	    if (maxVal < interval->val)
		maxVal = interval->val;
	    if (minVal > interval->val)
		minVal = interval->val;
	    }
	}
    el->validCount = round(validCount);
    el->minVal = minVal;
    el->maxVal = maxVal;
    el->sumData = sumData;
    el->sumSquares = sumSquares;
    }
return round(validCount);
}
Пример #25
0
void check(struct sqlConnection *conn, char *table)
/* Check it's as planned. */
{
char query[256], **row;
struct sqlResult *sr;
int lastEnd = -1, lastStart = -1, start, end;
sqlSafef(query, sizeof query, "select chromStart,chromEnd from %s", table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    start = atoi(row[0]);
    end = atoi(row[1]);
    if (start < lastStart)
        fprintf(stderr,"Out of order: %d,%d\n", lastStart, start);
    if (rangeIntersection(lastStart, lastEnd, start-1, end) > 0)
        fprintf(stderr,"Overlapping: (%d %d) (%d %d)\n", lastStart, lastEnd, start, end);
    lastStart = start;
    lastEnd = end;
    }
sqlFreeResult(&sr);
errAbort("All for now");
}
Пример #26
0
boolean agxIsSubset(struct altGraphX *query, struct altGraphX *target)
/** Return TRUE if query is just a subset of target, FALSE otherwise. */
{
int *qPos = query->vPositions, *tPos = target->vPositions;
int *qStarts = query->edgeStarts, *tStarts = target->edgeStarts;
int *qEnds = query->edgeEnds, *tEnds = target->edgeEnds;
int qECount = query->edgeCount, tECount = target->edgeCount;
int qIx = 0, tIx = 0;
if(query->tStart < target->tStart || query->tEnd > target->tEnd ||
   query->strand[0] != target->strand[0])
    return FALSE;

/* Look to see if every query edge is subsumed by 
   a target edge. */
for(qIx = 0; qIx < qECount; qIx++) 
    {
    boolean edgeFound = FALSE;
    /* only looking at exons. */
    if(altGraphXEdgeType(query, qIx) != ggExon)
	continue;
    /* Look at each target exon to try and find one that 
       subsumes the query exon. */
    for(tIx = 0; tIx < tECount; tIx++)
	{
	if(altGraphXEdgeType(target, tIx) != ggExon)
	    continue;
	if(rangeIntersection(qPos[qStarts[qIx]], qPos[qEnds[qIx]],
			     tPos[tStarts[tIx]], tPos[tEnds[tIx]]) > 0)
	    {
	    edgeFound |= TRUE; /* Found one, update edge found. */
	    break; /* No need to keep looking for this query exon. */
	    }
	}
    if (!edgeFound)
        return FALSE;
    }
return TRUE;
}
struct sizeList *unionSizeLists(struct sizeList *a, struct sizeList *b, FILE *err)
{
struct sizeList *s, *t, *u, *c=sizeListClone(a), *d=sizeListClone(b);
boolean didChange=TRUE;
int mergeGaps=20000;
if (a == NULL)
    return b;
while (didChange)
    {
    didChange=FALSE;
    for (s = c; s != NULL; s = s->next)
	for (t = d; t != NULL; t = t->next)
	    {
	    if (t->chrom == NULL || t->name == NULL)
		continue;
//	    printf("%s/%s.%d-%d\t%s/%s.%d-%d\t", s->name, s->chrom, s->chromStart, s->chromEnd, t->name, t->chrom, t->chromStart, t->chromEnd);
	    if ( !strncmp(s->name, t->name, 6) && sameString(s->chrom, t->chrom) )
		if (rangeIntersection(s->chromStart,s->chromEnd,t->chromStart,t->chromEnd)+mergeGaps>0)
		    {
		    s->chromStart = min(s->chromStart,t->chromStart);
		    s->chromEnd   = max(s->chromEnd,  t->chromEnd);
		    t->chrom  = t->name = NULL; // it would be better to remove the element here
		    didChange = TRUE;
		    continue;
		    }
	    }
    }
for (t = d; t != NULL; t = t->next)
    if (t->name != NULL && t->chrom!=NULL)
	{
	u = sizeListNew(t->chrom, t->chromStart, t->chromEnd, t->name);
	slAddTail(c, u);
	fprintf(err, "%s\t%d\t%d\t%s\n", t->chrom, t->chromStart, t->chromEnd, t->name);
	}
return c;
}
boolean isCoding(struct bed *bed)
/* Return TRUE if no defined coding region. */
{
return rangeIntersection(bed->chromStart, bed->chromEnd, bed->thickStart, bed->thickEnd) > 0;
}
Пример #29
0
static void rBoxJoin(struct boxRef *refList, 
	int qStart, int qEnd, int tStart, int tEnd)
/* Recursively cluster boxes. */
{
int boxCount = slCount(refList);

if (boxCount <= 1)
    {
    /* Easy: no merging required. */
    }
else if (boxCount == 2)
    {
    /* Decide if pair overlaps and if so merge. */
    struct box *a = refList->box;
    struct box *b = refList->next->box;
    if (rangeIntersection(a->in->qStart, a->in->qEnd, b->in->qStart, b->in->qEnd) > 0 &&
        rangeIntersection(a->in->tStart, a->in->tEnd, b->in->tStart, b->in->tEnd) > 0 )
	{
	mergeClusters(a->cluster, b->cluster);
	}
    else
        {
	/* Two non-overlapping boxes, we don't have to do anything. */
	}
    }
else if (allStartBy(refList, qStart, tStart))
    {
    /* If everybody contains the upper left corner, then they all can 
     * be merged.   This is the route taken often by clumps with lots
     * of overlap. */
    struct cluster *aCluster = refList->box->cluster;
    struct boxRef *ref;
    for (ref = refList->next; ref != NULL; ref = ref->next)
        {
	struct cluster *bCluster = ref->box->cluster;
	mergeClusters(aCluster, bCluster);
	}
    }
else if (allSameCluster(refList))
    {
    /* Everything is in the same cluster, no action required. */
    }
else
    {
    /* We can't yet figure out clumping, so break
     * up our window in two along larger dimension and
     * recurse on both subwindows. */
    struct boxRef *list1 = NULL, *list2 = NULL, *ref, *next;
    if (qEnd - qStart > tEnd - tStart)
        {
	int mid = (qStart + qEnd)>>1;
	for (ref = refList; ref != NULL; ref = next)
	    {
	    struct box *box = ref->box;
	    next = ref->next;
	    if (box->in->qEnd <= mid)
	        {
		slAddHead(&list1, ref);
		}
	    else if (box->in->qStart >= mid)
	        {
		slAddHead(&list2, ref);
		}
	    else
	        {
		/* Box crosses boundary, have to put it on both lists. */
		slAddHead(&list1, ref);
		lmAllocVar(lm, ref);
		ref->box = box;
		slAddHead(&list2, ref);
		}
	    }
	rBoxJoin(list1, qStart, mid, tStart, tEnd);
	rBoxJoin(list2, mid, qEnd, tStart, tEnd);
	}
    else
        {
Пример #30
0
void ggcChrom(struct chromGenes *chrom, char *axtFile, 
	struct ggcInfo *g, struct hash *restrictHash, 
	FILE *fParts)
/* Tabulate matches on chromosome. */
{
struct lineFile *lf = lineFileOpen(axtFile, TRUE);
bool *hits, *covers;
int hitCount = 0, coverCount = 0;
struct axt *axt;
struct genePred *gp;
int closeSize = g->closeSize;
int closeHalf = closeSize/2;

/* Build up array of booleans - one per base - which are
 * 1's where mouse/human align and bases match, zero 
 * elsewhere. */
AllocArray(hits, chrom->size);
AllocArray(covers, chrom->size);
printf("%s (%d bases)\n", chrom->name, chrom->size);
while ((axt = axtRead(lf)) != NULL)
    {
    int tPos = axt->tStart;
    int symCount = axt->symCount, i;
    char t, q, *tSym = axt->tSym, *qSym = axt->qSym;

    if (axt->tEnd > chrom->size)
        errAbort("tEnd %d, chrom size %d in %s", 
		axt->tEnd, chrom->size, axtFile);
    if (axt->tStrand == '-')
        errAbort("Can't handle minus strand on target in %s", axtFile);
    for (i=0; i<symCount; ++i)
        {
	t = tSym[i];
	if (t != '-')
	    {
	    q = qSym[i];
	    if (toupper(t) == toupper(q))
		{
	        hits[tPos] = TRUE;
		++hitCount;
		}
	    if (q == '-')
	       covers[tPos] = 1;
	    else
	       covers[tPos] = 2;
	    ++tPos;
	    }
	}
    axtFree(&axt);
    }

for (gp = chrom->geneList; gp != NULL; gp = gp->next)
    {
    int exonIx;
    int utr3Size = 0, utr5Size = 0, cdsAllSize = 0;
    int utr3Pos = 0, utr5Pos = 0, cdsAllPos = 0;
    bool *utr3Hits = NULL, *utr3Covers = NULL;
    bool *utr5Hits = NULL, *utr5Covers = NULL;
    bool *cdsAllHits = NULL, *cdsAllCovers = NULL;
    bool isRev = (gp->strand[0] == '-');


    /* Filter out genes not in restrict hash if any. */
    ++totalGenes;
    if (restrictHash != NULL)
        if (!hashLookup(restrictHash, gp->name))
	    continue;
    ++reviewedGenes;

    /* Filter out genes without meaningful UTRs */
    if (gp->cdsStart - gp->txStart < g->closeSize/2 || 
    	gp->txEnd - gp->cdsEnd < g->closeSize/2)
        continue;
    ++genesUsed;

    /* Total up UTR and CDS sizes. */
    for (exonIx=0; exonIx<gp->exonCount; ++exonIx)
	 {
	 int eStart = gp->exonStarts[exonIx];
	 int eEnd = gp->exonEnds[exonIx];
	 int eSize = eEnd - eStart;
	 int oneUtr, oneCds;
	 oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd);
	 if (oneCds > 0)
	     {
	     cdsAllSize += oneCds;
	     }
	 if (eStart < gp->cdsStart)
	     {
	     int utrStart = eStart;
	     int utrEnd = min(gp->cdsStart, eEnd);
	     int utrSize = utrEnd - utrStart;
	     if (isRev)
		 utr3Size += utrSize;
	     else
		 utr5Size += utrSize;
	     }
	 if (eEnd > gp->cdsEnd)
	     {
	     int utrStart = max(gp->cdsEnd, eStart);
	     int utrEnd = eEnd;
	     int utrSize = utrEnd - utrStart;
	     if (isRev)
		 utr5Size += utrSize;
	     else
		 utr3Size += utrSize;
	     }
	 }

    /* Condense hits from UTRs and CDSs */
    if (utr5Size > 0)
	{
	AllocArray(utr5Hits, utr5Size);
	AllocArray(utr5Covers, utr5Size);
	}
    if (utr3Size > 0)
	{
	AllocArray(utr3Hits, utr3Size);
	AllocArray(utr3Covers, utr3Size);
	}
    if (cdsAllSize > 0)
	{
	AllocArray(cdsAllHits, cdsAllSize);
	AllocArray(cdsAllCovers, cdsAllSize);
	}
    for (exonIx=0; exonIx<gp->exonCount; ++exonIx)
	{
	int eStart = gp->exonStarts[exonIx];
	int eEnd = gp->exonEnds[exonIx];
	int eSize = eEnd - eStart;
	int oneUtr, oneCds;
	oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd);
	if (oneCds > 0)
	    {
	    int cdsStart = eStart;
	    int cdsEnd = gp->cdsEnd;

	    if (cdsStart < gp->cdsStart)
		cdsStart = gp->cdsStart;
	    memcpy(cdsAllHits + cdsAllPos, hits + cdsStart, oneCds * sizeof(*hits));
	    memcpy(cdsAllCovers + cdsAllPos, covers + cdsStart, oneCds * sizeof(*covers));
	    cdsAllPos += oneCds;
	    }
	if (eStart < gp->cdsStart)
	    {
	    int utrStart = eStart;
	    int utrEnd = min(gp->cdsStart, eEnd);
	    int utrSize = utrEnd - utrStart;
	    if (isRev)
		{
		memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr3Pos += utrSize;
		}
	    else
		{
		memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr5Pos += utrSize;
		}
	    }
	if (eEnd > gp->cdsEnd)
	    {
	    int utrStart = max(gp->cdsEnd, eStart);
	    int utrEnd = eEnd;
	    int utrSize = utrEnd - utrStart;
	    if (isRev)
		{
		memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr5Pos += utrSize;
		}
	    else
		{
		memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr3Pos += utrSize;
		}
	    }
	}
    assert(utr3Pos == utr3Size);
    assert(utr5Pos == utr5Size);
    assert(cdsAllPos == cdsAllSize);

    tallyHits(&g->utr5, utr5Hits, utr5Covers, utr5Size, isRev);
    tallyHits(&g->utr3, utr3Hits, utr3Covers, utr3Size, isRev);
    tallyHits(&g->cdsAll, cdsAllHits, cdsAllCovers, cdsAllSize, isRev);

    /* Optionally write out file with gene by gene info. */
    if (fParts != NULL)
        {
	/* Write header line first time through. */
	static boolean firstTime = TRUE;
	if (firstTime)
	    {
	    firstTime = FALSE;
	    fprintf(fParts, "#accession\tsize_5\tali_5\tmatch_5\tsize_c\tali_c\tmatch_c\tsize_3\tali_3\tmatch_3\n");
	    }
	fprintf(fParts, "%s\t", gp->name);
	fprintf(fParts, "%d\t%d\t%d\t", utr5Size, 
		countBools(utr5Covers, utr5Size),
		countBools(utr5Hits, utr5Size));
	fprintf(fParts, "%d\t%d\t%d\t", cdsAllSize, 
		countBools(cdsAllCovers, cdsAllSize),
		countBools(cdsAllHits, cdsAllSize));
	fprintf(fParts, "%d\t%d\t%d\n", utr3Size, 
		countBools(utr3Covers, utr3Size),
		countBools(utr3Hits, utr3Size));
	}

    /* Tally upstream/downstream hits. */
	{
	int s1 = gp->txStart - closeHalf;
	int e1 = s1 + closeSize;
	int s2 = gp->txEnd - closeHalf;
	int e2 = s2 + closeSize;
	if (isRev)
	    {
	    tallyInRange(&g->down, hits, covers, chrom->size, gp->txStart - g->baseDown,
		gp->txStart, isRev);
	    tallyInRange(&g->up, hits, covers, chrom->size, gp->txEnd, 
		gp->txEnd + g->baseUp, isRev);
	    tallyInRange(&g->txEnd, hits, covers, chrom->size, s1, e1, isRev);
	    tallyInRange(&g->txStart, hits, covers, chrom->size, s2, e2, isRev);
	    }
	else
	    {
	    tallyInRange(&g->up, hits, covers, chrom->size, gp->txStart - g->baseUp,
		gp->txStart, isRev);
	    tallyInRange(&g->down, hits, covers, chrom->size, gp->txEnd, 
		gp->txEnd + g->baseDown, isRev);
	    tallyInRange(&g->txStart, hits, covers, chrom->size, s1, e1, isRev);
	    tallyInRange(&g->txEnd, hits, covers, chrom->size, s2, e2, isRev);
	    }
	}

    /* Tally hits in coding exons */
    for (exonIx=0; exonIx < gp->exonCount; ++exonIx)
        {
	int eStart = gp->exonStarts[exonIx];
	int eEnd = gp->exonEnds[exonIx];
	/* Single coding exon. */
	if (eStart <= gp->cdsStart && eEnd >= gp->cdsEnd)
	   {
	   eStart = gp->cdsStart;
	   eEnd = gp->cdsEnd;
	   tallyInRange(&g->cdsSingle, hits, covers, chrom->size,
	   		eStart, eEnd, isRev);
	   }
	/* Initial coding exon */
	else if (eStart < gp->cdsStart && eEnd > gp->cdsStart)
	    {
	    int cs = gp->cdsStart - closeHalf;
	    int ce = cs + closeSize;
	    eStart = gp->cdsStart;
	    if (isRev)
	        {
		tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsLast, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    else
	        {
		tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsFirst, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    }
	/* Final coding exon */
	else if (eStart < gp->cdsEnd && eEnd > gp->cdsEnd)
	    {
	    int cs = gp->cdsEnd - closeHalf;
	    int ce = cs + closeSize;
	    eEnd = gp->cdsEnd;
	    if (isRev)
	        {
		tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsFirst, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    else
	        {
		tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsLast, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    }
	/* Middle (but not only) coding exon */
	else if (eStart >= gp->cdsStart && eEnd <= gp->cdsEnd)
	    {
	    tallyInRange(&g->cdsMiddle, hits, covers, chrom->size, eStart, eEnd, isRev);
	    }
	else
	    {
	    }
	}
	

    /* Tally hits in introns and splice sites. */
    for (exonIx=1; exonIx<gp->exonCount; ++exonIx)
        {
	int iStart = gp->exonEnds[exonIx-1];
	int iEnd = gp->exonStarts[exonIx];
	int s1 = iStart - closeHalf;
	int e1 = s1 + closeSize;
	int s2 = iEnd - closeHalf;
	int e2 = s2 + closeSize;
	if (isRev)
	    {
	    tallyInRange(&g->splice3, hits, covers, chrom->size, 
		    s1, e1, isRev);
	    tallyInRange(&g->splice5, hits, covers, chrom->size, 
		    s2, e2, isRev);
	    }
	else
	    {
	    tallyInRange(&g->splice5, hits, covers, chrom->size, 
		    s1, e1, isRev);
	    tallyInRange(&g->splice3, hits, covers, chrom->size, 
		    s2, e2, isRev);
	    }
	tallyInRange(&g->intron, hits, covers, chrom->size, iStart, iEnd, isRev);
	}
    freez(&utr5Hits);
    freez(&utr3Hits);
    freez(&cdsAllHits);
    freez(&utr5Covers);
    freez(&utr3Covers);
    freez(&cdsAllCovers);
    }
freez(&hits);
freez(&covers);
lineFileClose(&lf);
}