void getTrfUnsplit(struct sqlConnection *conn, struct hash *chromHash) /* Return a tree of ranges for simple repeats in all chromosomes, * from a single query on the whole (unsplit) simpleRepeat table. */ { struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct simpleRange *range, *prevRange = NULL; struct sqlResult *sr; char **row; char *prevChrom = NULL; sr = sqlGetResult(conn, "NOSQLINJ select chrom,chromStart,chromEnd from simpleRepeat" " order by chrom,chromStart"); while ((row = sqlNextRow(sr)) != NULL) { if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(tree, prevRange); setTrf(prevChrom, chromHash, tree); prevRange = NULL; freeMem(prevChrom); stack = lmAlloc(qLm, 256 * sizeof(stack[0])); tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(row[0]); } lmAllocVar(tree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(tree, prevRange); prevRange = range; } } if (prevChrom != NULL) { rbTreeAdd(tree, prevRange); setTrf(prevChrom, chromHash, tree); freeMem(prevChrom); } sqlFreeResult(&sr); }
struct hash *resultsToTreesMergeInline(struct sqlResult *sr) /* Given results of a sorted query on chrom,chromStart,chromEnd, store results * as rbTrees hashed by chrom. */ { struct hash *chromHash = newHash(18); char **row = NULL; struct rbTree *t = rbTreeNew(rangeCmp); char *prevChrom = NULL; struct range *prevR = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct range *r = NULL; AllocVar(r); r->start = sqlUnsigned(row[1]); r->end = sqlUnsigned(row[2]); if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(t, prevR); addRbTree(prevChrom, chromHash, t); prevR = NULL; freeMem(prevChrom); prevChrom = cloneString(row[0]); t = rbTreeNew(rangeCmp); } if (prevR == NULL) prevR = r; else if (r->start <= prevR->end && prevR->start <= r->end) { /* Overlap: merge r into prevR & discard; prevR gets passed forward. */ if (r->end > prevR->end) prevR->end = r->end; if (r->start < prevR->start) prevR->start = r->start; freez(&r); } else { rbTreeAdd(t, prevR); prevR = r; } } if (prevChrom != NULL) { rbTreeAdd(t, prevR); addRbTree(prevChrom, chromHash, t); freeMem(prevChrom); } return chromHash; }
struct wordTree *wordTreeAddFollowing(struct wordTree *wt, char *word, struct lm *lm, struct rbTreeNode **stack) /* Make word follow wt in tree. If word already exists among followers * return it and bump use count. Otherwise create new one. */ { struct wordTree *w; /* Points to following element if any */ if (wt->following == NULL) { /* Allocate new if you've never seen it before. */ wt->following = rbTreeNewDetailed(wordTreeCmpWord, lm, stack); w = NULL; } else { /* Find word in existing tree */ struct wordTree key; key.word = word; w = rbTreeFind(wt->following, &key); } if (w == NULL) { w = wordTreeNew(word); rbTreeAdd(wt->following, w); } w->useCount += 1; return w; }
static struct rbTree *getNewRepeats(char *dirName, char *chrom) /* Read in repeatMasker .out line format file into a tree of ranges. */ /* Handles lineage-specific files that preserve header */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); struct simpleRange *range; char fileName[512]; struct lineFile *lf; char *row[7]; boolean headerDone = FALSE; sprintf(fileName, "%s/%s.out.spec", dirName, chrom); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { /* skip header lines (don't contain numeric first field) */ if (!headerDone && atoi(row[0]) == 0) continue; if (!sameString(chrom, row[4])) errAbort("Expecting %s word 5, line %d of %s\n", chrom, lf->lineIx, lf->fileName); headerDone = TRUE; lmAllocVar(tree->lm, range); range->start = lineFileNeedNum(lf, row, 5) - 1; range->end = lineFileNeedNum(lf, row, 6); rbTreeAdd(tree, range); } lineFileClose(&lf); return tree; }
static void getRepeatsTable(struct sqlConnection *conn, char *table, char *chrom, struct rbTree **retAllRepeats, struct rbTree **retNewRepeats) /* Return a tree of ranges for sequence gaps in chromosome from * specified table */ { struct sqlResult *sr; char **row; struct rbTree *allTree = rbTreeNew(simpleRangeCmp); struct rbTree *newTree = rbTreeNew(simpleRangeCmp); char query[256]; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s " "where chrom = \"%s\"", table, chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } } if (prevRange != NULL) rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); sqlFreeResult(&sr); *retAllRepeats = allTree; *retNewRepeats = newTree; } /* static void getRepeatsTable() */
struct rbTree *wigIntoRangeTree(char *fileName) /* Return a range tree full of wiggle records. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct rbTree *wigTree = rbTreeNew(bedRangeCmp); struct wigSection *section; while ((section = wigSectionRead(lf)) != NULL) rbTreeAdd(wigTree, section); return wigTree; }
void addSpaceForGap(struct chrom *chrom, struct gap *gap) /* Given a gap, create corresponding space in chromosome's * space rbTree. */ { struct space *space; AllocVar(space); space->gap = gap; space->start = gap->start; space->end = gap->end; rbTreeAdd(chrom->spaces, space); }
struct rbTree *rbTreeFromNetFile(char *fileName) /* Build an rbTree from a net file */ { struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp); struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chainNet *cn = chainNetRead(lf); struct cnFill *fill = NULL; for(fill=cn->fillList; fill != NULL; fill = fill->next) { rbTreeAdd(rbTree, fill); } return rbTree; }
static void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge) /* Add edge back if it is still unique, otherwise move evidence from * edge into existing edge. */ { struct edge *existing = rbTreeFind(edgeTree, edge); if (existing) { existing->evList = slCat(existing->evList, edge->evList); edge->evList = NULL; } else rbTreeAdd(edgeTree, edge); }
static struct vertex *addUniqueVertex(struct rbTree *tree, int position, enum ggVertexType type) /* Find existing vertex if it exists, otherwise create and return new one. */ { struct vertex *v = matchingVertex(tree, position, type); if (v == NULL) { lmAllocVar(tree->lm, v); v->position = position; v->type = type; rbTreeAdd(tree, v); } return v; }
struct rbTree *getTrf(struct sqlConnection *conn, char *chrom) /* Return a tree of ranges for simple repeats in chromosome. */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); struct simpleRange *range, *prevRange = NULL; char query[256]; struct sqlResult *sr; char **row; sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat " "where chrom = '%s'", chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { lmAllocVar(tree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(tree, prevRange); prevRange = range; } } if (prevRange != NULL) rbTreeAdd(tree, prevRange); sqlFreeResult(&sr); return tree; }
struct fill *fillSpace(struct chrom *chrom, struct space *space, struct chain *chain, struct cBlock *startBlock, boolean isQ) /* Fill in space with chain, remove existing space from chrom, * and add smaller spaces on either side if big enough. */ { struct fill *fill; int s, e; struct space *lSpace, *rSpace; if (!innerBounds(startBlock, isQ, space->start, space->end, &s, &e)) return NULL; assert(s < e); AllocVar(fill); fill->start = s; fill->end = e; fill->chain = chain; rbTreeRemove(chrom->spaces, space); if (s - space->start >= minSpace) { AllocVar(lSpace); lSpace->gap = space->gap; lSpace->start = space->start; lSpace->end = s; rbTreeAdd(chrom->spaces, lSpace); } if (space->end - e >= minSpace) { AllocVar(rSpace); rSpace->gap = space->gap; rSpace->start = e; rSpace->end = space->end; rbTreeAdd(chrom->spaces, rSpace); } slAddHead(&space->gap->fillList, fill); return fill; }
static struct range *rangeTreeAddValHead(struct rbTree *tree, int start, int end, struct slName **newVal) { struct range *r, *existing; struct slName *head; AllocVar(r); r->start = start; r->end = end; r->val = *newVal; while ((existing = rbTreeRemove(tree, r))) { r->start = min(r->start, existing->start); r->end = max(r->end, existing->end); head = (struct slName *)(existing->val); slAddHead(&head, *newVal); r->val = head; } rbTreeAdd(tree, r); return r; }
static struct visiMatch *visiSearcherAdd(struct visiSearcher *searcher, int imageId, double weight, int startWord, int wordCount) /* Add given weight to match involving imageId, creating * a fresh match if necessary for imageId. */ { struct visiMatch key, *match; key.imageId = imageId; match = rbTreeFind(searcher->tree, &key); if (match == NULL) { match = visiMatchNew(imageId, searcher->wordCount); slAddHead(&searcher->matchList, match); rbTreeAdd(searcher->tree, match); } match->weight += weight; assert(startWord + wordCount <= searcher->wordCount); bitSetRange(match->wordBits, startWord, wordCount); return match; }
boolean isUniqueCoordAndAgx(char *db, struct intronEv *iv, struct hash *posHash, struct hash *agxHash) /** Return TRUE if iv isn't in posHash and agxHash. Return FALSE otherwise. */ { static char key[1024]; static struct rbTree *bedTree = NULL; boolean unique = TRUE; struct bed *bed = NULL; if(bedTree == NULL) bedTree = rbTreeNew(bedRangeCmp); /* Unique location (don't pick same intron twice. */ if(bedUniqueInTree(bedTree, iv)) { AllocVar(bed); bed->chrom = cloneString(iv->chrom); bed->chromStart = iv->e1S; bed->chromEnd = iv->e2E; rbTreeAdd(bedTree, bed); } else unique = FALSE; /* Unique loci, don't pick from same overall loci if possible. */ safef(key, sizeof(key), "%s", iv->agxName); if(hashFindVal(agxHash, key) == NULL) hashAdd(agxHash, key, iv); else unique = FALSE; /* Definitely don't pick from same mRNA. */ chopSuffix(iv->ev->orthoBedName); safef(key, sizeof(key), "%s", iv->ev->orthoBedName); if(hashFindVal(agxHash, key) == NULL) hashAdd(agxHash, key, iv); else unique = FALSE; if(unique) unique = !checkMgcPicks(db, iv); return unique; }
void rbTest(int count) /* Fill up rbTree with count # of nodes and then search for those * nodes and then free it up. */ { int i, j; struct rbTree *tree = rbTreeNew(rbTreeCmpInt); struct lm *lm = tree->lm; for (i=0; i<count; ++i) { int *pt; lmAllocVar(lm, pt); *pt = i; rbTreeAdd(tree, pt); } for (j=0; j<10; ++j) for (i=0; i<count; ++i) if (!rbTreeFind(tree, &i)) errAbort("Couldnt' find %d", i); rbTreeFree(&tree); }
void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash) /* Return a tree of ranges for sequence gaps in all chromosomes, * assuming an unsplit gap table -- when the table is unsplit, it's * probably for a scaffold assembly where we *really* don't want * to do one query per scaffold! */ { struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap"); struct sqlResult *sr; char **row; char *prevChrom = NULL; sr = sqlGetResult(conn, "NOSQLINJ select * from gap order by chrom"); while ((row = sqlNextRow(sr)) != NULL) { struct agpGap gap; struct simpleRange *range; agpGapStaticLoad(row+rowOffset, &gap); if (prevChrom == NULL) prevChrom = cloneString(gap.chrom); else if (! sameString(prevChrom, gap.chrom)) { setNGap(prevChrom, chromHash, tree); freeMem(prevChrom); stack = lmAlloc(qLm, 256 * sizeof(stack[0])); tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(gap.chrom); } lmAllocVar(tree->lm, range); range->start = gap.chromStart; range->end = gap.chromEnd; rbTreeAdd(tree, range); } if (prevChrom != NULL) { setNGap(prevChrom, chromHash, tree); freeMem(prevChrom); } sqlFreeResult(&sr); }
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom) /* Return a tree of ranges for sequence gaps in chromosome */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); int rowOffset; struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct agpGap gap; struct simpleRange *range; agpGapStaticLoad(row+rowOffset, &gap); lmAllocVar(tree->lm, range); range->start = gap.chromStart; range->end = gap.chromEnd; rbTreeAdd(tree, range); } sqlFreeResult(&sr); return tree; }
void addRangeListAsRbTree(char *chrom, struct hash *chromHash, struct range *rangeList, boolean doSort) /* Given a list of ranges for a chrom, sort if specified, merge overlapping * ranges, create a non-merging rbTree of ranges and store it in chromHash. */ { struct rbTree *t = rbTreeNew(rangeCmp); struct range *r = NULL; struct hashEl *hel = hashLookup(chromHash, chrom); if (hel != NULL) errAbort("resultsToTrees: need results ordered by chrom, " "but looks like they weren't for %s.", chrom); if (doSort) slSort(&rangeList, rangeCmpStart); mergeOverlaps(&rangeList); for (r = rangeList; r != NULL; r = r->next) { rbTreeAdd(t, r); } hashAdd(chromHash, chrom, t); }
struct range *rangeTreeAddVal(struct rbTree *tree, int start, int end, void *val, void *(*mergeVals)(void *existingVal, void *newVal) ) /* Add range to tree, merging with existing ranges if need be. * If this is a new range, set the value to this val. * If there are existing items for this range, and if mergeVals function is not null, * apply mergeVals to the existing values and this new val, storing the result as the val * for this range (see rangeTreeAddValCount() and rangeTreeAddValList() below for examples). */ { struct range *r, *existing; r = lmAlloc(tree->lm, sizeof(*r)); /* alloc new zeroed range */ r->start = start; r->end = end; r->val = val; while ((existing = rbTreeRemove(tree, r)) != NULL) { r->start = min(r->start, existing->start); r->end = max(r->end, existing->end); if (mergeVals) r->val = mergeVals(existing->val, r->val); } rbTreeAdd(tree, r); return r; }
static struct edge *addUniqueEdge(struct rbTree *tree, struct vertex *start, struct vertex *end, struct linkedBeds *lb) /* Find existing edge if it exists. Otherwise create and return new one. * Regardless add lb as evidence to edge. */ { struct edge *e = matchingEdge(tree, start, end); if (e == NULL) { lmAllocVar(tree->lm, e); e->start = start; e->end = end; e->next = NULL; rbTreeAdd(tree, e); } struct evidence *ev; lmAllocVar(tree->lm, ev); ev->lb = lb; ev->start = start->position; ev->end = end->position; slAddHead(&e->evList, ev); return e; }
void rangeTreeAddToCoverageDepth(struct rbTree *tree, int start, int end) /* Add area from start to end to a tree that is being built up to store the * depth of coverage. Recover coverage back out by looking at ptToInt(range->val) * on tree elements. */ { struct range q; q.start = start; q.end = end; struct range *r, *existing = rbTreeFind(tree, &q); if (existing == NULL) { lmAllocVar(tree->lm, r); r->start = start; r->end = end; r->val = intToPt(1); rbTreeAdd(tree, r); } else { if (existing->start <= start && existing->end >= end) /* The existing one completely encompasses us */ { /* Make a new section for the bit before start. */ if (existing->start < start) { lmAllocVar(tree->lm, r); r->start = existing->start; r->end = start; r->val = existing->val; existing->start = start; rbTreeAdd(tree, r); } /* Make a new section for the bit after end. */ if (existing->end > end) { lmAllocVar(tree->lm, r); r->start = end; r->end = existing->end; r->val = existing->val; existing->end = end; rbTreeAdd(tree, r); } /* Increment existing section in overlapping area. */ existing->val = (char *)(existing->val) + 1; } else /* In general case fetch list of regions that overlap us. Remaining cases to handle are: r >> e rrrrrrrrrrrrrrrrrrrr eeeeeeeeee e < r rrrrrrrrrrrrrrr eeeeeeeeeeee r < e rrrrrrrrrrrr eeeeeeeeeeeee */ { struct range *existingList = rangeTreeAllOverlapping(tree, start, end); #ifdef DEBUG /* Make sure that list is really sorted for debugging... */ int lastStart = existingList->start; for (r = existingList; r != NULL; r = r->next) { int start = r->start; if (start < lastStart) internalErr(); } #endif /* DEBUG */ int s = start, e = end; for (existing = existingList; existing != NULL; existing = existing->next) { /* Deal with start of new range that comes before existing */ if (s < existing->start) { lmAllocVar(tree->lm, r); r->start = s; r->end = existing->start; r->val = intToPt(1); s = existing->start; rbTreeAdd(tree, r); } else if (s > existing->start) { lmAllocVar(tree->lm, r); r->start = existing->start; r->end = s; r->val = existing->val; existing->start = s; rbTreeAdd(tree, r); } existing->val = (char *)(existing->val) + 1; s = existing->end; } if (s < e) /* Deal with end of new range that doesn't overlap with anything. */ { lmAllocVar(tree->lm, r); r->start = s; r->end = e; r->val = intToPt(1); rbTreeAdd(tree, r); } } } }
static void getRepeats(struct sqlConnection *conn, struct hash *arHash, char *chrom, struct rbTree **retAllRepeats, struct rbTree **retNewRepeats) /* Return a tree of ranges for sequence gaps in chromosome */ { char *db = sqlGetDatabase(conn); struct sqlResult *sr; char **row; struct rbTree *allTree = rbTreeNew(simpleRangeCmp); struct rbTree *newTree = rbTreeNew(simpleRangeCmp); char tableName[64]; char query[256]; boolean splitRmsk = TRUE; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; safef(tableName, sizeof(tableName), "%s_rmsk", chrom); if (! sqlTableExists(conn, tableName)) { safef(tableName, sizeof(tableName), "rmsk"); if (! sqlTableExists(conn, tableName)) errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n", chrom, db, chrom, db); splitRmsk = FALSE; } if (splitRmsk) sqlSafef(query, sizeof query, "select genoStart,genoEnd,repName,repClass,repFamily from %s", tableName); else sqlSafef(query, sizeof query, "select genoStart,genoEnd,repName,repClass,repFamily from %s " "where genoName = \"%s\"", tableName, chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; char arKey[512]; lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]); if (arHash != NULL && hashLookup(arHash, arKey)) { lmAllocVar(newTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevNewRange == NULL) prevNewRange = range; else if (overlap(range, prevNewRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevNewRange->end) prevNewRange->end = range->end; if (range->start < prevNewRange->start) prevNewRange->start = range->start; } else { rbTreeAdd(allTree, prevNewRange); prevNewRange = range; } } } if (prevRange != NULL) rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); sqlFreeResult(&sr); *retAllRepeats = allTree; *retNewRepeats = newTree; }
static void getRepeatsUnsplitTable(struct sqlConnection *conn, struct hash *chromHash, char *table) /* Return a tree of ranges for sequence gaps all chromosomes, * from specified table */ { struct sqlResult *sr; char **row; struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0])); struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack); char *prevChrom = NULL; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; char query[256]; sqlSafef(query, ArraySize(query), "select chrom,chromStart,chromEnd from %s " "order by chrom,chromStart", table); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); prevRange = prevNewRange = NULL; stack = lmAlloc(qLm, 256 * sizeof(stack[0])); allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(row[0]); } lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } } if (prevChrom != NULL) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); } sqlFreeResult(&sr); } /* void getRepeatsUnsplitTable() */
static void getRepeatsUnsplit(struct sqlConnection *conn, struct hash *chromHash, struct hash *arHash) /* Return a tree of ranges for sequence gaps all chromosomes, * assuming an unsplit table -- when the table is unsplit, it's * probably for a scaffold assembly where we *really* don't want * to do one query per scaffold! */ { struct sqlResult *sr; char **row; struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0])); struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack); char *prevChrom = NULL; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; sr = sqlGetResult(conn, "NOSQLINJ select genoName,genoStart,genoEnd,repName,repClass,repFamily from rmsk " "order by genoName,genoStart"); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; char arKey[512]; if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); prevRange = prevNewRange = NULL; stack = lmAlloc(qLm, 256 * sizeof(stack[0])); allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); if (arHash != NULL) { stack = lmAlloc(qLm, 256 * sizeof(stack[0])); newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); } prevChrom = cloneString(row[0]); } lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } sprintf(arKey, "%s.%s.%s", row[3], row[4], row[5]); if (arHash != NULL && hashLookup(arHash, arKey)) { lmAllocVar(newTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevNewRange == NULL) prevNewRange = range; else if (overlap(range, prevNewRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevNewRange->end) prevNewRange->end = range->end; if (range->start < prevNewRange->start) prevNewRange->start = range->start; } else { rbTreeAdd(newTree, prevNewRange); prevNewRange = range; } } } if (prevChrom != NULL) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); } sqlFreeResult(&sr); }
static void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Merge together overlapping edges with soft ends. */ { struct mergedEdge /* Hold together info on a merged edge. */ { struct evidence *evidence; }; /* Traverse graph and build up range tree. Each node in the range tree * will represent the bounds of coordinates of overlapping double softs */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) rangeTreeAdd(rangeTree, start->position, end->position); } /* Traverse graph again merging edges */ for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start= edge->start; struct vertex *end = edge->end; if (start->type == ggSoftStart && end->type == ggSoftEnd) { struct range *r = rangeTreeFindEnclosing(rangeTree, start->position, end->position); assert(r != NULL); /* At this point, r represents the bounds of a double-soft * region that encompasses this edge. Collect the set of * evidence of edges overlapping this range */ struct mergedEdge *mergeEdge = r->val; if (mergeEdge == NULL) { lmAllocVar(rangeTree->lm, mergeEdge); r->val = mergeEdge; } mergeEdge->evidence = slCat(edge->evList, mergeEdge->evidence); verbose(3, "Merging doubly-soft edge (%d,%d) into range (%d,%d)\n", start->position, end->position, r->start, r->end); edge->evList = NULL; rbTreeRemove(edgeTree, edge); } } /* Traverse merged edge list, making a single edge from each range. At this point, * each range will have some evidence attached to it, from each of the double softs * that fall within the range. From all of this evidence, make a single consensus edge */ struct range *r; struct lm *lm = lmInit(0); for (r = rangeTreeList(rangeTree); r != NULL; r = r->next) { struct mergedEdge *mergedEdge = r->val; struct edge *edge = edgeFromConsensusOfEvidence(vertexTree, mergedEdge->evidence, lm); if (edge != NULL) rbTreeAdd(edgeTree, edge); verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", edge->start->position, edge->end->position, r->start, r->end); } /* Clean up and go home. */ lmCleanup(&lm); removeUnusedVertices(vertexTree, edgeTree); slFreeList(&edgeRefList); rbTreeFree(&rangeTree); }