static void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize, int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName) /* Snap hard vertices to nearby soft vertices of same type. */ { struct lm *lm = lmInit(0); addWaysInAndOut(vertexTree, edgeTree, lm); struct dlList *vList = sortedListFromTree(vertexTree); struct dlNode *node; int snapCount = 0; for (node = vList->head; !dlEnd(node); node = node->next) { if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName)) { rbTreeRemove(vertexTree, node->val); ++snapCount; } } /* Clean up ways in and out since have removed some nodes. */ for (node = vList->head; !dlEnd(node); node = node->next) { struct vertex *v = node->val; v->waysIn = v->waysOut = NULL; } if (snapCount > 0) { verbose(3, "Snapped %d close edges, now have %d vertices\n", snapCount, vertexTree->n); updateForwardedEdges(edgeTree); } dlListFree(&vList); lmCleanup(&lm); }
static void updateForwardedEdges(struct rbTree *edgeTree) /* Go through edges, following the movedTo's in the * vertices if need be. */ { struct slRef *ref, *refList = rbTreeItems(edgeTree); int forwardCount = 0; for (ref = refList; ref != NULL; ref = ref->next) { struct edge *edge = ref->val; struct vertex *start = edge->start, *end = edge->end; if (start->movedTo || end->movedTo) { ++forwardCount; rbTreeRemove(edgeTree, edge); if (start->movedTo) edge->start = start->movedTo; if (end->movedTo) edge->end = end->movedTo; mergeOrAddEdge(edgeTree, edge); } } if (forwardCount > 0) verbose(3, "Forwarded %d edges.\n", forwardCount); slFreeList(&refList); }
static void halfConsensusBackward(struct vertex *v, struct rbTree *vertexTree, struct rbTree *edgeTree, enum ggVertexType softType, struct lm *lm) /* Figure out consensus start of all edges end at v that have soft start. */ { /* Collect a list of all attached softies. */ struct sourceAndPos *list = NULL, *el; struct slRef *edgeRef; int softCount = 0; for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *v = edge->start; if (v->type == softType) { struct evidence *ev; for (ev = edge->evList; ev != NULL; ev = ev->next) { lmAllocVar(lm, el); el->position = ev->start; el->trustedSource = trustedSource(ev->lb->sourceType); slAddHead(&list, el); ++softCount; } } } /* See if have enough elements to make consensus forming * worthwhile. */ if (softCount > 1) { slSort(&list, sourceAndPosCmp); struct vertex *start = consensusVertex(vertexTree, list, softCount, softType); for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *v = edge->start; if (v != start && v->type == softType) { rbTreeRemove(edgeTree, edge); verbose(3, "Performing half-hard consensus: moving edge start from %d to %d\n", edge->start->position, start->position); edge->start = start; mergeOrAddEdge(edgeTree, edge); // Will always merge. } } } }
static struct range *rangeTreeAddValHead(struct rbTree *tree, int start, int end, struct slName **newVal) { struct range *r, *existing; struct slName *head; AllocVar(r); r->start = start; r->end = end; r->val = *newVal; while ((existing = rbTreeRemove(tree, r))) { r->start = min(r->start, existing->start); r->end = max(r->end, existing->end); head = (struct slName *)(existing->val); slAddHead(&head, *newVal); r->val = head; } rbTreeAdd(tree, r); return r; }
static void removeEmptyEdges(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Remove edges that are zero or negative in length. */ { int removeCount = 0; struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; if (edge->start->position >= edge->end->position) { removeCount += 1; rbTreeRemove(edgeTree, edge); } } if (removeCount) removeUnusedVertices(vertexTree, edgeTree); slFreeList(&edgeRefList); }
int snapHalfHardBackward(struct vertex *v, struct rbTree *edgeTree, enum ggVertexType softType, enum ggVertexType hardType) /* V is a hard vertex. Try to snap soft start vertices connected to v * to nearest hard vertex connected to v. */ { int snapCount = 0; // enum ggVertex otherHardType = (hardType == ggHardStart ? ggHardEnd : ggHardStart); slSort(&v->waysIn, edgeRefCmpStartRev); struct slRef *hardRef = v->waysIn, *softRef = v->waysIn; for (;;) { /* Advance softRef to next soft ended edge. */ for (;softRef != NULL; softRef = softRef->next) { struct edge *edge = softRef->val; if (edge->start->type == softType) break; } if (softRef == NULL) break; struct edge *softEdge = softRef->val; /* If hardRef is before softRef (or it's not hard) advance it */ for (;hardRef != NULL; hardRef = hardRef->next) { struct edge *edge = hardRef->val; if (edge->start->type == hardType && edge->start->position <= softEdge->start->position) break; } if (hardRef == NULL) break; rbTreeRemove(edgeTree, softEdge); struct edge *hardEdge = hardRef->val; verbose(3, "Snapping half-hard edge starting at %d to %d\n", softEdge->start->position, hardEdge->start->position); softEdge->start = hardEdge->start; ++snapCount; mergeOrAddEdge(edgeTree, softEdge); softRef = softRef->next; } if (snapCount > 0) verbose(3, "Snapped %d reverse\n", snapCount); return snapCount; }
struct range *rangeTreeAddVal(struct rbTree *tree, int start, int end, void *val, void *(*mergeVals)(void *existingVal, void *newVal) ) /* Add range to tree, merging with existing ranges if need be. * If this is a new range, set the value to this val. * If there are existing items for this range, and if mergeVals function is not null, * apply mergeVals to the existing values and this new val, storing the result as the val * for this range (see rangeTreeAddValCount() and rangeTreeAddValList() below for examples). */ { struct range *r, *existing; r = lmAlloc(tree->lm, sizeof(*r)); /* alloc new zeroed range */ r->start = start; r->end = end; r->val = val; while ((existing = rbTreeRemove(tree, r)) != NULL) { r->start = min(r->start, existing->start); r->end = max(r->end, existing->end); if (mergeVals) r->val = mergeVals(existing->val, r->val); } rbTreeAdd(tree, r); return r; }
struct fill *fillSpace(struct chrom *chrom, struct space *space, struct chain *chain, struct cBlock *startBlock, boolean isQ) /* Fill in space with chain, remove existing space from chrom, * and add smaller spaces on either side if big enough. */ { struct fill *fill; int s, e; struct space *lSpace, *rSpace; if (!innerBounds(startBlock, isQ, space->start, space->end, &s, &e)) return NULL; assert(s < e); AllocVar(fill); fill->start = s; fill->end = e; fill->chain = chain; rbTreeRemove(chrom->spaces, space); if (s - space->start >= minSpace) { AllocVar(lSpace); lSpace->gap = space->gap; lSpace->start = space->start; lSpace->end = s; rbTreeAdd(chrom->spaces, lSpace); } if (space->end - e >= minSpace) { AllocVar(rSpace); rSpace->gap = space->gap; rSpace->start = e; rSpace->end = space->end; rbTreeAdd(chrom->spaces, rSpace); } slAddHead(&space->gap->fillList, fill); return fill; }
static void removeUnusedVertices(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Remove vertices not connected to any edges. */ { /* Get vertex list and clear counts. */ struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; v->count = 0; } /* Inc counts of vertices connected to edges. */ rbTreeTraverse(edgeTree, incVertexUses); /* Remove unused vertices. */ for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; if (v->count == 0) rbTreeRemove(vertexTree, v); } slFreeList(&vRefList); }
static void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxBleedOver, double singleExonMaxOverlap) /* Move double-softs that overlap spliced things to a very great extent into * the spliced things. Also remove tiny double-softs (no more than 2*maxBleedOver). */ { /* Traverse graph and build up range tree covering spliced exons. For each * range of overlapping exons, assemble a singly-linked list of all exons in * the range */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); int removedCount = 0; for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggHardStart || end->type == ggHardEnd) { rangeTreeAddValList(rangeTree, start->position, end->position, edge); } } /* Traverse graph yet one more time looking for doubly-soft exons * that are overlapping the spliced exons. */ for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) { int s = start->position; int e = end->position; int size = e - s; if (size <= maxBleedOver+maxBleedOver) { /* Tiny case, just remove edge and forget it. */ verbose(3, "Removing tiny double-soft edge from %d to %d\n", s, e); rbTreeRemove(edgeTree, edge); ++removedCount; } else { /* Normal case, look for exon list that encloses us, and * if any single exon in that list encloses us, merge into it. */ int splicedOverlap = rangeTreeOverlapSize(rangeTree, s, e); if (splicedOverlap > 0 && splicedOverlap > singleExonMaxOverlap*size) { if (!trustedEdge(edge)) { /* Once we find a range that overlaps the doubly-soft edge, find * (half-hard or better) edge from that range that encloses the * doubly soft edge. */ struct range *r = rangeTreeMaxOverlapping(rangeTree, s, e); struct edge *nextEdge, *edgeList = r->val; struct edge *enclosingEdge = NULL; for (nextEdge = edgeList; edgeList != NULL; edgeList = edgeList->next) { if (encloses(nextEdge, edge)) { enclosingEdge = nextEdge; } } if (enclosingEdge != NULL) { enclosingEdge->evList = slCat(enclosingEdge->evList, edge->evList); edge->evList = NULL; verbose(3, "Removing doubly-soft edge %d-%d, reassigning to %d-%d\n", s, e, enclosingEdge->start->position, enclosingEdge->end->position); rbTreeRemove(edgeTree, edge); ++removedCount; } } } } } } /* Clean up and go home. */ if (removedCount > 0) removeUnusedVertices(vertexTree, edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *nextEdge, *edge = edgeRef->val; while (edge != NULL) { nextEdge = edge->next; edge->next = NULL; edge = nextEdge; } } slFreeList(&edgeRefList); rbTreeFree(&rangeTree); }
static void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Merge together overlapping edges with soft ends. */ { struct mergedEdge /* Hold together info on a merged edge. */ { struct evidence *evidence; }; /* Traverse graph and build up range tree. Each node in the range tree * will represent the bounds of coordinates of overlapping double softs */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) rangeTreeAdd(rangeTree, start->position, end->position); } /* Traverse graph again merging edges */ for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start= edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) { struct range *r = rangeTreeFindEnclosing(rangeTree, start->position, end->position); assert(r != NULL); /* At this point, r represents the bounds of a double-soft * region that encompasses this edge. Collect the set of * evidence of edges overlapping this range */ struct mergedEdge *mergeEdge = r->val; if (mergeEdge == NULL) { lmAllocVar(rangeTree->lm, mergeEdge); r->val = mergeEdge; } mergeEdge->evidence = slCat(edge->evList, mergeEdge->evidence); verbose(3, "Merging doubly-soft edge (%d,%d) into range (%d,%d)\n", start->position, end->position, r->start, r->end); edge->evList = NULL; rbTreeRemove(edgeTree, edge); } } /* Traverse merged edge list, making a single edge from each range. At this point, * each range will have some evidence attached to it, from each of the double softs * that fall within the range. From all of this evidence, make a single consensus edge */ struct range *r; struct lm *lm = lmInit(0); for (r = rangeTreeList(rangeTree); r != NULL; r = r->next) { struct mergedEdge *mergedEdge = r->val; struct edge *edge = edgeFromConsensusOfEvidence(vertexTree, mergedEdge->evidence, lm); if (edge != NULL) rbTreeAdd(edgeTree, edge); verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", edge->start->position, edge->end->position, r->start, r->end); } /* Clean up and go home. */ lmCleanup(&lm); removeUnusedVertices(vertexTree, edgeTree); slFreeList(&edgeRefList); rbTreeFree(&rangeTree); }