void peakClusterMakerAddFromSource(struct peakClusterMaker *maker, struct peakSource *source) /* Read through data source and add items to it to rangeTrees in maker */ { struct hash *chromHash = maker->chromHash; struct lineFile *lf = lineFileOpen(source->dataSource, TRUE); struct lm *lm = chromHash->lm; /* Local memory pool - share with hash */ char *row[source->minColCount]; struct peakItem *item; char *line; while (lineFileNextReal(lf, &line)) { char *asciiLine = lmCloneString(lm, line); int wordCount = chopByWhite(line, row, source->minColCount); lineFileExpectAtLeast(lf, source->minColCount, wordCount); char *chrom = row[source->chromColIx]; struct hashEl *hel = hashLookup(chromHash, chrom); if (hel == NULL) { struct rbTree *tree = rangeTreeNewDetailed(lm, maker->stack); hel = hashAdd(chromHash, chrom, tree); } struct rbTree *tree = hel->val; lmAllocVar(lm, item); item->chrom = hel->name; item->chromStart = sqlUnsigned(row[source->startColIx]); item->chromEnd = sqlUnsigned(row[source->endColIx]); item->score = sqlDouble(row[source->scoreColIx]) * source->normFactor; if (item->score > 1000) item->score = 1000; item->source = source; item->asciiLine = asciiLine; rangeTreeAddValList(tree, item->chromStart, item->chromEnd, item); } lineFileClose(&lf); }
static void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxBleedOver, double singleExonMaxOverlap) /* Move double-softs that overlap spliced things to a very great extent into * the spliced things. Also remove tiny double-softs (no more than 2*maxBleedOver). */ { /* Traverse graph and build up range tree covering spliced exons. For each * range of overlapping exons, assemble a singly-linked list of all exons in * the range */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); int removedCount = 0; for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggHardStart || end->type == ggHardEnd) { rangeTreeAddValList(rangeTree, start->position, end->position, edge); } } /* Traverse graph yet one more time looking for doubly-soft exons * that are overlapping the spliced exons. */ for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) { int s = start->position; int e = end->position; int size = e - s; if (size <= maxBleedOver+maxBleedOver) { /* Tiny case, just remove edge and forget it. */ verbose(3, "Removing tiny double-soft edge from %d to %d\n", s, e); rbTreeRemove(edgeTree, edge); ++removedCount; } else { /* Normal case, look for exon list that encloses us, and * if any single exon in that list encloses us, merge into it. */ int splicedOverlap = rangeTreeOverlapSize(rangeTree, s, e); if (splicedOverlap > 0 && splicedOverlap > singleExonMaxOverlap*size) { if (!trustedEdge(edge)) { /* Once we find a range that overlaps the doubly-soft edge, find * (half-hard or better) edge from that range that encloses the * doubly soft edge. */ struct range *r = rangeTreeMaxOverlapping(rangeTree, s, e); struct edge *nextEdge, *edgeList = r->val; struct edge *enclosingEdge = NULL; for (nextEdge = edgeList; edgeList != NULL; edgeList = edgeList->next) { if (encloses(nextEdge, edge)) { enclosingEdge = nextEdge; } } if (enclosingEdge != NULL) { enclosingEdge->evList = slCat(enclosingEdge->evList, edge->evList); edge->evList = NULL; verbose(3, "Removing doubly-soft edge %d-%d, reassigning to %d-%d\n", s, e, enclosingEdge->start->position, enclosingEdge->end->position); rbTreeRemove(edgeTree, edge); ++removedCount; } } } } } } /* Clean up and go home. */ if (removedCount > 0) removeUnusedVertices(vertexTree, edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *nextEdge, *edge = edgeRef->val; while (edge != NULL) { nextEdge = edge->next; edge->next = NULL; edge = nextEdge; } } slFreeList(&edgeRefList); rbTreeFree(&rangeTree); }