int bedFirstCdsSize(struct bed *bed) /* Return size of coding portion of first coding exon. */ { int chromStart = bed->chromStart; if (bed->strand[0] == '-') { int i; for (i=bed->blockCount-1; i >= 0; --i) { int start = chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; int cdsSize = rangeIntersection(start, end, bed->thickStart, bed->thickEnd); if (cdsSize > 0) return cdsSize; } } else { int i; for (i=0; i<bed->blockCount; ++i) { int start = chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; int cdsSize = rangeIntersection(start, end, bed->thickStart, bed->thickEnd); if (cdsSize > 0) return cdsSize; } } return 0; }
int orthoScore(struct orthoCdsArray *ortho, struct cdsEvidence *orf) /* Return score consisting of 1 per real base in overlapping orthologous CDS */ { int biggestSize = 0; int biggestStart = -1, biggestEnd = -1; int i; int score = 0; for (i=orf->start; i<orf->end; i += 3) { struct orthoCds *cds = &ortho->cdsArray[i]; int size = rangeIntersection(cds->start, cds->end, orf->start, orf->end); if (size > biggestSize) { biggestSize = size; biggestStart = cds->start; biggestEnd = cds->end; } } biggestStart = max(biggestStart, orf->start); biggestEnd = min(biggestEnd, orf->end); for (i=biggestStart; i < biggestEnd; ++i) { char base = ortho->cdsArray[i].base; if (base != '.' && base != '-') score += 1; } // uglyf("orthoScore for %s %d-%d %s is %d\n", orf->name, orf->start, orf->end, ortho->species, score); return score; }
static void addCluster(struct lm *lm, struct peakItem *itemList, int start, int end, struct peakCluster **pList) /* Make cluster of all items that overlap start/end, and put it on list. */ { struct peakCluster *cluster; lmAllocVar(lm, cluster); double score = 0.0; double maxSubScore = 0.0; struct slRef *refList = NULL, *ref; struct peakItem *item; for (item = itemList; item != NULL; item = item->next) { if (rangeIntersection(start, end, item->chromStart, item->chromEnd) > 0) { lmAllocVar(lm, ref); ref->val = item; slAddHead(&refList, ref); score += item->score; if (item->score > maxSubScore) maxSubScore = item->score; } } slReverse(&refList); cluster->chrom = itemList->chrom; cluster->chromStart = start; cluster->chromEnd = end; cluster->itemRefList = refList; cluster->score = score; cluster->maxSubScore = maxSubScore; slAddHead(pList, cluster); }
static int isContained (MrfRead *currRead) { MrfBlock* currBlock; Array annotatedTranscripts; Interval *currTranscript; SubInterval *currExon; int overlap; int i,j,k; for (i = 0; i < arrayMax (currRead->blocks); i++) { currBlock = arrp (currRead->blocks,i,MrfBlock); annotatedTranscripts = intervalFind_getOverlappingIntervals (currBlock->targetName,currBlock->targetStart,currBlock->targetEnd); for (j = 0; j < arrayMax (annotatedTranscripts); j++) { currTranscript = arru (annotatedTranscripts,j,Interval*); for (k = 0; k < arrayMax (currTranscript->subIntervals); k++) { currExon = arrp (currTranscript->subIntervals,k,SubInterval); overlap = rangeIntersection (currBlock->targetStart,currBlock->targetEnd,currExon->start,currExon->end); if (overlap > 0) { return 1; } } } } return 0; }
void bbiIntervalCorrelatePair(struct bbiInterval *a, struct bbiInterval *b, struct correlate *c) /* Update c with information from bits of a and b that overlap. */ { int overlap = rangeIntersection(a->start, a->end, b->start, b->end); assert(overlap > 0); correlateNextMulti(c, clamp(a->val), clamp(b->val), overlap); }
boolean isSoftExon(struct altGraphX *agx, int edge) /* Return TRUE if edge is an exon and has a soft start or soft end. */ { int *vPos = agx->vPositions; unsigned char *vT = agx->vTypes; int *starts = agx->edgeStarts; int *ends = agx->edgeEnds; boolean soft = FALSE; int i; if(getSpliceEdgeType(agx, edge) != ggExon) return FALSE; else if(vT[starts[edge]] == ggSoftStart || vT[ends[edge]] == ggSoftEnd) { soft = TRUE; if(!strict) { for(i = 0; i < agx->edgeCount; i++) { if(i == edge) continue; if(isHardExon(agx, i) && rangeIntersection(vPos[starts[edge]], vPos[ends[edge]], vPos[starts[i]], vPos[ends[i]]) > 0) { return FALSE; } } } } return soft; }
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end) /* rt1dCreate - find items in 1-D range tree. */ { struct lineFile *lf = lineFileOpen(tabFile, TRUE); struct crTreeFile *crf = crTreeFileOpen(treeFile); struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end); verbose(2, "Got %d overlapping blocks\n", slCount(blockList)); for (block = blockList; block != NULL; block = block->next) { verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size); lineFileSeek(lf, block->offset, SEEK_SET); bits64 sizeUsed = 0; while (sizeUsed < block->size) { char *line; int size; if (!lineFileNext(lf, &line, &size)) errAbort("Couldn't read %s\n", lf->fileName); char *parsedLine = cloneString(line); char *row[3]; if (chopLine(parsedLine, row) != ArraySize(row)) errAbort("Badly formatted line of %s\n%s", lf->fileName, line); char *bedChrom = row[0]; bits32 bedStart = sqlUnsigned(row[1]); bits32 bedEnd = sqlUnsigned(row[2]); if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0) fprintf(stdout, "%s\n", line); freeMem(parsedLine); sizeUsed += size; } } crTreeFileClose(&crf); }
void constExons(struct txGraph *graph, FILE *f) /* Write out constituitive exons. */ { /* Create a tree with all introns. */ struct rbTree *tree = rangeTreeNew(); struct txEdge *edge; for (edge = graph->edgeList; edge != NULL; edge = edge->next) { if (edge->type == ggIntron) { rangeTreeAdd(tree, graph->vertices[edge->startIx].position, graph->vertices[edge->endIx].position); } } /* Scan through all exons looking for ones that don't intersect * introns. */ int eId = 0; for (edge = graph->edgeList; edge != NULL; edge = edge->next) { if (edge->type == ggExon) { struct txVertex *s = &graph->vertices[edge->startIx]; struct txVertex *e = &graph->vertices[edge->endIx]; if (s->type == ggHardStart && e->type == ggHardEnd) { int start = s->position; int end = e->position; if (!rangeTreeOverlaps(tree, start, end)) { char *refSource = refSourceAcc(graph, edge); if (refSource != NULL && edge->evCount >= 10) { /* Do one more scan making sure that it doesn't * intersect any exons except for us. */ boolean anyOtherExon = FALSE; struct txEdge *ed; for (ed = graph->edgeList; ed != NULL; ed = ed->next) { if (ed != edge) { int edStart = graph->vertices[ed->startIx].position; int edEnd = graph->vertices[ed->endIx].position; if (rangeIntersection(edStart, edEnd, start, end) > 0) { anyOtherExon = TRUE; break; } } } if (!anyOtherExon) fprintf(f, "%s\t%d\t%d\t%s.%d\t0\t%s\n", graph->tName, start, end, refSource, ++eId, graph->strand); } } } } } rangeTreeFree(&tree); }
boolean pslOverlap(struct psl *a, struct psl *b) /* Returns TRUE if two psl's overlap. */ { if (doTarget) { if (!sameString(a->tName, b->tName)) return FALSE; return rangeIntersection(a->tStart, a->tEnd, b->tStart, b->tEnd) > 0; } else { if (!sameString(a->qName, b->qName)) return FALSE; return rangeIntersection(a->qStart, a->qEnd, b->qStart, b->qEnd) > 0; } }
void addInterSize(void *item) /* Add range to interSize. */ { struct simpleRange *r = item; int size; size = rangeIntersection(r->start, r->end, interRange.start, interRange.end); interSize += size; }
boolean hitsRegions(char *chrom, int start, int end, struct region *regionList) /* Return TRUE if position intersects any region on list. */ { struct region *r; for (r = regionList; r != NULL; r = r->next) { if (sameString(chrom, r->chrom) && rangeIntersection(start, end, r->start, r->end) > 0) return TRUE; } return FALSE; }
static boolean breakUpIfOnDiagonal(struct block *blockList, boolean isRc, char *qName, char *tName, int qSize, int tSize, struct block *retBlockLists[], int maxBlockLists, int *retCount) /* If any blocks are on diagonal, remove the blocks and separate the lists * of blocks before and after the diagonal. Store block list pointers in * retBlockLists, the number of lists in retCount, and return TRUE if * we found any blocks on diagonal so we know to rescore afterwards. */ { int blockListIndex = 0; boolean brokenUp = FALSE; retBlockLists[blockListIndex] = blockList; if (sameString(qName, tName)) { struct block *block = NULL, *lastBlock = NULL; int i = 0; for (block = blockList; block != NULL; block = block->next) { int qStart = block->qStart; int qEnd = block->qEnd; if (lastBlock != NULL && block == retBlockLists[blockListIndex]) freez(&lastBlock); if (isRc) reverseIntRange(&qStart, &qEnd, qSize); if (rangeIntersection(block->tStart, block->tEnd, qStart, qEnd) > 0) { brokenUp = TRUE; if (block != retBlockLists[blockListIndex]) { assert(lastBlock != NULL); lastBlock->next = NULL; blockListIndex++; if (blockListIndex >= maxBlockLists) errAbort("breakUpIfOnDiagonal: Too many fragmented block lists!"); } retBlockLists[blockListIndex] = block->next; } lastBlock = block; } if (retBlockLists[blockListIndex] == NULL) { blockListIndex--; if (lastBlock != NULL) freez(&lastBlock); } for (i=0; i <= blockListIndex; i++) { retBlockLists[i] = removeFrayedEnds(retBlockLists[i]); } } *retCount = blockListIndex + 1; return brokenUp; }
void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end, bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares, int reduction, struct bbiSummary **pOutList) /* Add data range to summary - putting it onto top of list if possible, otherwise * expanding list. */ { struct bbiSummary *sum = *pOutList; if (end > chromSize) // Avoid pathological clipping situation on bad input end = chromSize; while (start < end) { /* See if need to allocate a new summary. */ if (sum == NULL || sum->chromId != chromId || sum->end <= start) { struct bbiSummary *newSum; AllocVar(newSum); newSum->chromId = chromId; if (sum == NULL || sum->chromId != chromId || sum->end + reduction <= start) newSum->start = start; else newSum->start = sum->end; newSum->end = newSum->start + reduction; if (newSum->end > chromSize) newSum->end = chromSize; newSum->minVal = minVal; newSum->maxVal = maxVal; sum = newSum; slAddHead(pOutList, sum); } /* Figure out amount of overlap between current summary and item */ int overlap = rangeIntersection(start, end, sum->start, sum->end); if (overlap <= 0) { warn("%u %u doesn't intersect %u %u, chromId %u chromSize %u", start, end, sum->start, sum->end, chromId, chromSize); internalErr(); } int itemSize = end - start; double overlapFactor = (double)overlap/itemSize; /* Fold overlapping bits into output. */ sum->validCount += overlapFactor * validCount; if (sum->minVal > minVal) sum->minVal = minVal; if (sum->maxVal < maxVal) sum->maxVal = maxVal; sum->sumData += overlapFactor * sumData; sum->sumSquares += overlapFactor * sumSquares; /* Advance over overlapping bits. */ start += overlap; } }
int bkCountOverlappingRange(struct binKeeper *bk, int start, int end) /* Return biggest overlap of anything in binKeeper with given range. */ { struct binElement *el, *list = binKeeperFind(bk, start, end); int overlap, bestOverlap = 0; for (el = list; el != NULL; el = el->next) { overlap = rangeIntersection(el->start, el->end, start, end); if (overlap > bestOverlap) bestOverlap = overlap; } return bestOverlap; }
struct edge *edgeFromConsensusOfEvidence(struct rbTree *vertexTree, struct evidence *evList, struct lm *lm) /* Attempt to create a single edge from a list of overlapping evidence ranges. * The start will be the consensus of all evidence starts. Likewise * the end will be the consensus of all evidence ends. The evidence that * overlaps this edge will be included in the edge. */ { /* Gather up lists of starts and ends. */ struct sourceAndPos *startList = NULL, *endList = NULL; struct evidence *ev, *nextEv; int listSize = 0; for (ev = evList; ev != NULL; ev = ev->next) { struct sourceAndPos *x; boolean trusted = trustedSource(ev->lb->sourceType); lmAllocVar(lm, x); x->position = ev->start; x->trustedSource = trusted; slAddHead(&startList, x); lmAllocVar(lm, x); x->position = ev->end; x->trustedSource = trusted; slAddHead(&endList, x); ++listSize; } /* Get consensus starts and ends. */ slSort(&startList, sourceAndPosCmp); struct vertex *start = consensusVertex(vertexTree, startList, listSize, ggSoftStart); slSort(&endList, sourceAndPosCmpRev); struct vertex *end = consensusVertex(vertexTree, endList, listSize, ggSoftEnd); /* Make edge */ struct edge *edge; AllocVar(edge); edge->start = start; edge->end = end; edge->next = NULL; /* Add overlapping evidence to edge. */ for (ev = evList; ev != NULL; ev = nextEv) { nextEv = ev->next; if (rangeIntersection(ev->start, ev->end, start->position, end->position) > 0) slAddHead(&edge->evList, ev); } return edge; }
static int isContained (MrfRead *currRead, char *targetName, int targetStart, int targetEnd) { MrfBlock* currBlock; int i; for (i = 0; i < arrayMax (currRead->blocks); i++) { currBlock = arrp (currRead->blocks,i,MrfBlock); if (strEqual (currBlock->targetName,targetName)) { if (rangeIntersection (currBlock->targetStart,currBlock->targetEnd,targetStart,targetEnd) > 0 ) { return 1; } } } return 0; }
void addBigWigIntervalInfo(struct bbiFile *bbi, struct lm *lm, char *chrom, int start, int end, int *pSumSize, int *pSumCoverage, double *pSumVal) /* Read in interval from bigBed and add it sums. */ { struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bbi, chrom, start, end, lm); *pSumSize += (end - start); for (iv = ivList; iv != NULL; iv = iv->next) { int cov1 = rangeIntersection(iv->start, iv->end, start, end); if (cov1 > 0) { *pSumCoverage += cov1; *pSumVal += cov1 * iv->val; } } }
static struct psl *mapCDnaCDnaAln(struct hapRegions *hr, struct cDnaAlign *refAln, struct psl *mappedHap) /* create cdna to cdna alignments from mappedHap and refAln, return * NULL if can't be mapped */ { struct psl *cDnaCDnaAln = NULL; if (sameString(refAln->psl->tName, mappedHap->tName) && rangeIntersection(refAln->psl->tStart, refAln->psl->tEnd, mappedHap->tStart, mappedHap->tEnd)) { pslSwap(mappedHap, FALSE); cDnaCDnaAln = pslTransMap(pslTransMapNoOpts, refAln->psl, mappedHap); pslSwap(mappedHap, FALSE); if ((hr->hapRefCDnaFh != NULL) && (cDnaCDnaAln != NULL)) cDnaAlignPslOut(cDnaCDnaAln, refAln->alnId, hr->hapRefCDnaFh); } return cDnaCDnaAln; }
struct range *rangeTreeMaxOverlapping(struct rbTree *tree, int start, int end) /* Return item that overlaps most with start-end. Not thread safe. Trashes list used * by rangeTreeAllOverlapping. */ { struct range *range, *best = NULL; int bestOverlap = 0; for (range = rangeTreeAllOverlapping(tree, start, end); range != NULL; range = range->next) { int overlap = rangeIntersection(range->start, range->end, start, end); if (overlap > bestOverlap) { bestOverlap = overlap; best = range; } } if (best) best->next = NULL; /* could be set by calls to List functions */ return best; }
struct protFeature *highestScoringFeature(struct protFeature *start, struct protFeature *end, int rangeStart, int rangeEnd) /* Return highest scoring feature from start up to end. */ { struct protFeature *bestFeat = NULL, *feat; double bestScore = -1.0; for (feat = start; feat != end ; feat = feat->next) { if (rangeIntersection(rangeStart, rangeEnd, feat->start, feat->end) > 0) { if (feat->score > bestScore) { bestFeat = feat; bestScore = feat->score; } } } return bestFeat; }
void writeAnswer(struct clone *cloneList, char *fileName) /* Write out answer, assuming cloneList is sorted. */ { FILE *f = mustOpen(fileName, "w"); struct clone *nextClone = NULL, *clone; int end = 0; for (clone = cloneList; clone != NULL; clone = nextClone) { nextClone = clone->next; dumpClone(clone, f); if (clone->end > end) end = clone->end; if (nextClone == NULL || rangeIntersection(clone->start, end, nextClone->start, nextClone->end) <= 0) { fprintf(f, "\n"); } } carefulClose(&f); }
void removeOutside(int start, int end, struct segment *seg) /* Remove parts of seg outside of range start-end. */ { struct genScanGene *gene, *nextGene, *geneList = NULL; for (gene = seg->geneList; gene != NULL; gene = nextGene) { nextGene = gene->next; if (rangeIntersection(start, end, gene->start, gene->end)) { slAddHead(&geneList, gene); gene->featureList = removeFeaturesOutside(start, end, gene->featureList); calcGeneBounds(gene); } } slReverse(&geneList); seg->geneList = geneList; seg->suboptList = removeFeaturesOutside(start, end, seg->suboptList); }
static bits32 bbiSummarySlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, struct bbiSummary *sumList, struct bbiSummaryElement *el) /* Update retVal with the average value if there is any data in interval. Return number * of valid data bases in interval. */ { bits32 validCount = 0; if (sumList != NULL) { double minVal = sumList->minVal; double maxVal = sumList->maxVal; double sumData = 0, sumSquares = 0; struct bbiSummary *sum; for (sum = sumList; sum != NULL && sum->start < baseEnd; sum = sum->next) { int overlap = rangeIntersection(baseStart, baseEnd, sum->start, sum->end); if (overlap > 0) { double overlapFactor = (double)overlap / (sum->end - sum->start); validCount += sum->validCount * overlapFactor; sumData += sum->sumData * overlapFactor; sumSquares += sum->sumSquares * overlapFactor; if (maxVal < sum->maxVal) maxVal = sum->maxVal; if (minVal > sum->minVal) minVal = sum->minVal; } } if (validCount > 0) { el->validCount = validCount; el->minVal = minVal; el->maxVal = maxVal; el->sumData = sumData; el->sumSquares = sumSquares; } } return validCount; }
static bits32 bbiIntervalSlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, struct bbiInterval *intervalList, struct bbiSummaryElement *el) /* Update retVal with the average value if there is any data in interval. Return number * of valid data bases in interval. */ { double validCount = 0; if (intervalList != NULL) { struct bbiInterval *interval; double sumData = 0, sumSquares = 0; double minVal = intervalList->val; double maxVal = intervalList->val; for (interval = intervalList; interval != NULL && interval->start < baseEnd; interval = interval->next) { int overlap = rangeIntersection(baseStart, baseEnd, interval->start, interval->end); if (overlap > 0) { int intervalSize = interval->end - interval->start; double overlapFactor = (double)overlap / intervalSize; double intervalWeight = intervalSize * overlapFactor; validCount += intervalWeight; sumData += interval->val * intervalWeight; sumSquares += interval->val * interval->val * intervalWeight; if (maxVal < interval->val) maxVal = interval->val; if (minVal > interval->val) minVal = interval->val; } } el->validCount = round(validCount); el->minVal = minVal; el->maxVal = maxVal; el->sumData = sumData; el->sumSquares = sumSquares; } return round(validCount); }
void check(struct sqlConnection *conn, char *table) /* Check it's as planned. */ { char query[256], **row; struct sqlResult *sr; int lastEnd = -1, lastStart = -1, start, end; sqlSafef(query, sizeof query, "select chromStart,chromEnd from %s", table); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { start = atoi(row[0]); end = atoi(row[1]); if (start < lastStart) fprintf(stderr,"Out of order: %d,%d\n", lastStart, start); if (rangeIntersection(lastStart, lastEnd, start-1, end) > 0) fprintf(stderr,"Overlapping: (%d %d) (%d %d)\n", lastStart, lastEnd, start, end); lastStart = start; lastEnd = end; } sqlFreeResult(&sr); errAbort("All for now"); }
boolean agxIsSubset(struct altGraphX *query, struct altGraphX *target) /** Return TRUE if query is just a subset of target, FALSE otherwise. */ { int *qPos = query->vPositions, *tPos = target->vPositions; int *qStarts = query->edgeStarts, *tStarts = target->edgeStarts; int *qEnds = query->edgeEnds, *tEnds = target->edgeEnds; int qECount = query->edgeCount, tECount = target->edgeCount; int qIx = 0, tIx = 0; if(query->tStart < target->tStart || query->tEnd > target->tEnd || query->strand[0] != target->strand[0]) return FALSE; /* Look to see if every query edge is subsumed by a target edge. */ for(qIx = 0; qIx < qECount; qIx++) { boolean edgeFound = FALSE; /* only looking at exons. */ if(altGraphXEdgeType(query, qIx) != ggExon) continue; /* Look at each target exon to try and find one that subsumes the query exon. */ for(tIx = 0; tIx < tECount; tIx++) { if(altGraphXEdgeType(target, tIx) != ggExon) continue; if(rangeIntersection(qPos[qStarts[qIx]], qPos[qEnds[qIx]], tPos[tStarts[tIx]], tPos[tEnds[tIx]]) > 0) { edgeFound |= TRUE; /* Found one, update edge found. */ break; /* No need to keep looking for this query exon. */ } } if (!edgeFound) return FALSE; } return TRUE; }
struct sizeList *unionSizeLists(struct sizeList *a, struct sizeList *b, FILE *err) { struct sizeList *s, *t, *u, *c=sizeListClone(a), *d=sizeListClone(b); boolean didChange=TRUE; int mergeGaps=20000; if (a == NULL) return b; while (didChange) { didChange=FALSE; for (s = c; s != NULL; s = s->next) for (t = d; t != NULL; t = t->next) { if (t->chrom == NULL || t->name == NULL) continue; // printf("%s/%s.%d-%d\t%s/%s.%d-%d\t", s->name, s->chrom, s->chromStart, s->chromEnd, t->name, t->chrom, t->chromStart, t->chromEnd); if ( !strncmp(s->name, t->name, 6) && sameString(s->chrom, t->chrom) ) if (rangeIntersection(s->chromStart,s->chromEnd,t->chromStart,t->chromEnd)+mergeGaps>0) { s->chromStart = min(s->chromStart,t->chromStart); s->chromEnd = max(s->chromEnd, t->chromEnd); t->chrom = t->name = NULL; // it would be better to remove the element here didChange = TRUE; continue; } } } for (t = d; t != NULL; t = t->next) if (t->name != NULL && t->chrom!=NULL) { u = sizeListNew(t->chrom, t->chromStart, t->chromEnd, t->name); slAddTail(c, u); fprintf(err, "%s\t%d\t%d\t%s\n", t->chrom, t->chromStart, t->chromEnd, t->name); } return c; }
boolean isCoding(struct bed *bed) /* Return TRUE if no defined coding region. */ { return rangeIntersection(bed->chromStart, bed->chromEnd, bed->thickStart, bed->thickEnd) > 0; }
static void rBoxJoin(struct boxRef *refList, int qStart, int qEnd, int tStart, int tEnd) /* Recursively cluster boxes. */ { int boxCount = slCount(refList); if (boxCount <= 1) { /* Easy: no merging required. */ } else if (boxCount == 2) { /* Decide if pair overlaps and if so merge. */ struct box *a = refList->box; struct box *b = refList->next->box; if (rangeIntersection(a->in->qStart, a->in->qEnd, b->in->qStart, b->in->qEnd) > 0 && rangeIntersection(a->in->tStart, a->in->tEnd, b->in->tStart, b->in->tEnd) > 0 ) { mergeClusters(a->cluster, b->cluster); } else { /* Two non-overlapping boxes, we don't have to do anything. */ } } else if (allStartBy(refList, qStart, tStart)) { /* If everybody contains the upper left corner, then they all can * be merged. This is the route taken often by clumps with lots * of overlap. */ struct cluster *aCluster = refList->box->cluster; struct boxRef *ref; for (ref = refList->next; ref != NULL; ref = ref->next) { struct cluster *bCluster = ref->box->cluster; mergeClusters(aCluster, bCluster); } } else if (allSameCluster(refList)) { /* Everything is in the same cluster, no action required. */ } else { /* We can't yet figure out clumping, so break * up our window in two along larger dimension and * recurse on both subwindows. */ struct boxRef *list1 = NULL, *list2 = NULL, *ref, *next; if (qEnd - qStart > tEnd - tStart) { int mid = (qStart + qEnd)>>1; for (ref = refList; ref != NULL; ref = next) { struct box *box = ref->box; next = ref->next; if (box->in->qEnd <= mid) { slAddHead(&list1, ref); } else if (box->in->qStart >= mid) { slAddHead(&list2, ref); } else { /* Box crosses boundary, have to put it on both lists. */ slAddHead(&list1, ref); lmAllocVar(lm, ref); ref->box = box; slAddHead(&list2, ref); } } rBoxJoin(list1, qStart, mid, tStart, tEnd); rBoxJoin(list2, mid, qEnd, tStart, tEnd); } else {
void ggcChrom(struct chromGenes *chrom, char *axtFile, struct ggcInfo *g, struct hash *restrictHash, FILE *fParts) /* Tabulate matches on chromosome. */ { struct lineFile *lf = lineFileOpen(axtFile, TRUE); bool *hits, *covers; int hitCount = 0, coverCount = 0; struct axt *axt; struct genePred *gp; int closeSize = g->closeSize; int closeHalf = closeSize/2; /* Build up array of booleans - one per base - which are * 1's where mouse/human align and bases match, zero * elsewhere. */ AllocArray(hits, chrom->size); AllocArray(covers, chrom->size); printf("%s (%d bases)\n", chrom->name, chrom->size); while ((axt = axtRead(lf)) != NULL) { int tPos = axt->tStart; int symCount = axt->symCount, i; char t, q, *tSym = axt->tSym, *qSym = axt->qSym; if (axt->tEnd > chrom->size) errAbort("tEnd %d, chrom size %d in %s", axt->tEnd, chrom->size, axtFile); if (axt->tStrand == '-') errAbort("Can't handle minus strand on target in %s", axtFile); for (i=0; i<symCount; ++i) { t = tSym[i]; if (t != '-') { q = qSym[i]; if (toupper(t) == toupper(q)) { hits[tPos] = TRUE; ++hitCount; } if (q == '-') covers[tPos] = 1; else covers[tPos] = 2; ++tPos; } } axtFree(&axt); } for (gp = chrom->geneList; gp != NULL; gp = gp->next) { int exonIx; int utr3Size = 0, utr5Size = 0, cdsAllSize = 0; int utr3Pos = 0, utr5Pos = 0, cdsAllPos = 0; bool *utr3Hits = NULL, *utr3Covers = NULL; bool *utr5Hits = NULL, *utr5Covers = NULL; bool *cdsAllHits = NULL, *cdsAllCovers = NULL; bool isRev = (gp->strand[0] == '-'); /* Filter out genes not in restrict hash if any. */ ++totalGenes; if (restrictHash != NULL) if (!hashLookup(restrictHash, gp->name)) continue; ++reviewedGenes; /* Filter out genes without meaningful UTRs */ if (gp->cdsStart - gp->txStart < g->closeSize/2 || gp->txEnd - gp->cdsEnd < g->closeSize/2) continue; ++genesUsed; /* Total up UTR and CDS sizes. */ for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { cdsAllSize += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) utr3Size += utrSize; else utr5Size += utrSize; } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) utr5Size += utrSize; else utr3Size += utrSize; } } /* Condense hits from UTRs and CDSs */ if (utr5Size > 0) { AllocArray(utr5Hits, utr5Size); AllocArray(utr5Covers, utr5Size); } if (utr3Size > 0) { AllocArray(utr3Hits, utr3Size); AllocArray(utr3Covers, utr3Size); } if (cdsAllSize > 0) { AllocArray(cdsAllHits, cdsAllSize); AllocArray(cdsAllCovers, cdsAllSize); } for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { int cdsStart = eStart; int cdsEnd = gp->cdsEnd; if (cdsStart < gp->cdsStart) cdsStart = gp->cdsStart; memcpy(cdsAllHits + cdsAllPos, hits + cdsStart, oneCds * sizeof(*hits)); memcpy(cdsAllCovers + cdsAllPos, covers + cdsStart, oneCds * sizeof(*covers)); cdsAllPos += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } else { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } else { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } } } assert(utr3Pos == utr3Size); assert(utr5Pos == utr5Size); assert(cdsAllPos == cdsAllSize); tallyHits(&g->utr5, utr5Hits, utr5Covers, utr5Size, isRev); tallyHits(&g->utr3, utr3Hits, utr3Covers, utr3Size, isRev); tallyHits(&g->cdsAll, cdsAllHits, cdsAllCovers, cdsAllSize, isRev); /* Optionally write out file with gene by gene info. */ if (fParts != NULL) { /* Write header line first time through. */ static boolean firstTime = TRUE; if (firstTime) { firstTime = FALSE; fprintf(fParts, "#accession\tsize_5\tali_5\tmatch_5\tsize_c\tali_c\tmatch_c\tsize_3\tali_3\tmatch_3\n"); } fprintf(fParts, "%s\t", gp->name); fprintf(fParts, "%d\t%d\t%d\t", utr5Size, countBools(utr5Covers, utr5Size), countBools(utr5Hits, utr5Size)); fprintf(fParts, "%d\t%d\t%d\t", cdsAllSize, countBools(cdsAllCovers, cdsAllSize), countBools(cdsAllHits, cdsAllSize)); fprintf(fParts, "%d\t%d\t%d\n", utr3Size, countBools(utr3Covers, utr3Size), countBools(utr3Hits, utr3Size)); } /* Tally upstream/downstream hits. */ { int s1 = gp->txStart - closeHalf; int e1 = s1 + closeSize; int s2 = gp->txEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->down, hits, covers, chrom->size, gp->txStart - g->baseDown, gp->txStart, isRev); tallyInRange(&g->up, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseUp, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->up, hits, covers, chrom->size, gp->txStart - g->baseUp, gp->txStart, isRev); tallyInRange(&g->down, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseDown, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s2, e2, isRev); } } /* Tally hits in coding exons */ for (exonIx=0; exonIx < gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; /* Single coding exon. */ if (eStart <= gp->cdsStart && eEnd >= gp->cdsEnd) { eStart = gp->cdsStart; eEnd = gp->cdsEnd; tallyInRange(&g->cdsSingle, hits, covers, chrom->size, eStart, eEnd, isRev); } /* Initial coding exon */ else if (eStart < gp->cdsStart && eEnd > gp->cdsStart) { int cs = gp->cdsStart - closeHalf; int ce = cs + closeSize; eStart = gp->cdsStart; if (isRev) { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Final coding exon */ else if (eStart < gp->cdsEnd && eEnd > gp->cdsEnd) { int cs = gp->cdsEnd - closeHalf; int ce = cs + closeSize; eEnd = gp->cdsEnd; if (isRev) { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Middle (but not only) coding exon */ else if (eStart >= gp->cdsStart && eEnd <= gp->cdsEnd) { tallyInRange(&g->cdsMiddle, hits, covers, chrom->size, eStart, eEnd, isRev); } else { } } /* Tally hits in introns and splice sites. */ for (exonIx=1; exonIx<gp->exonCount; ++exonIx) { int iStart = gp->exonEnds[exonIx-1]; int iEnd = gp->exonStarts[exonIx]; int s1 = iStart - closeHalf; int e1 = s1 + closeSize; int s2 = iEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->splice3, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice5, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->splice5, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice3, hits, covers, chrom->size, s2, e2, isRev); } tallyInRange(&g->intron, hits, covers, chrom->size, iStart, iEnd, isRev); } freez(&utr5Hits); freez(&utr3Hits); freez(&cdsAllHits); freez(&utr5Covers); freez(&utr3Covers); freez(&cdsAllCovers); } freez(&hits); freez(&covers); lineFileClose(&lf); }