struct hash *resultsToTreesMergeInline(struct sqlResult *sr) /* Given results of a sorted query on chrom,chromStart,chromEnd, store results * as rbTrees hashed by chrom. */ { struct hash *chromHash = newHash(18); char **row = NULL; struct rbTree *t = rbTreeNew(rangeCmp); char *prevChrom = NULL; struct range *prevR = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct range *r = NULL; AllocVar(r); r->start = sqlUnsigned(row[1]); r->end = sqlUnsigned(row[2]); if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(t, prevR); addRbTree(prevChrom, chromHash, t); prevR = NULL; freeMem(prevChrom); prevChrom = cloneString(row[0]); t = rbTreeNew(rangeCmp); } if (prevR == NULL) prevR = r; else if (r->start <= prevR->end && prevR->start <= r->end) { /* Overlap: merge r into prevR & discard; prevR gets passed forward. */ if (r->end > prevR->end) prevR->end = r->end; if (r->start < prevR->start) prevR->start = r->start; freez(&r); } else { rbTreeAdd(t, prevR); prevR = r; } } if (prevChrom != NULL) { rbTreeAdd(t, prevR); addRbTree(prevChrom, chromHash, t); freeMem(prevChrom); } return chromHash; }
static struct rbTree *getNewRepeats(char *dirName, char *chrom) /* Read in repeatMasker .out line format file into a tree of ranges. */ /* Handles lineage-specific files that preserve header */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); struct simpleRange *range; char fileName[512]; struct lineFile *lf; char *row[7]; boolean headerDone = FALSE; sprintf(fileName, "%s/%s.out.spec", dirName, chrom); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { /* skip header lines (don't contain numeric first field) */ if (!headerDone && atoi(row[0]) == 0) continue; if (!sameString(chrom, row[4])) errAbort("Expecting %s word 5, line %d of %s\n", chrom, lf->lineIx, lf->fileName); headerDone = TRUE; lmAllocVar(tree->lm, range); range->start = lineFileNeedNum(lf, row, 5) - 1; range->end = lineFileNeedNum(lf, row, 6); rbTreeAdd(tree, range); } lineFileClose(&lf); return tree; }
static struct rbTree *makeVertexTree(struct linkedBeds *lbList) /* Make tree of unique vertices. */ { struct rbTree *vertexTree = rbTreeNew(vertexCmp); struct linkedBeds *lb; for (lb = lbList; lb != NULL; lb = lb->next) { struct bed *bed; for (bed = lb->bedList; bed != NULL; bed = bed->next) { /* Add very beginning and end, they'll be soft. */ addUniqueVertex(vertexTree, bed->chromStart, ggSoftStart); addUniqueVertex(vertexTree, bed->chromEnd, ggSoftEnd); /* Add internal hard ends. */ int i, lastBlock = bed->blockCount-1; for (i=0; i<lastBlock; ++i) { addUniqueVertex(vertexTree, bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i], ggHardEnd); addUniqueVertex(vertexTree, bed->chromStart + bed->chromStarts[i+1], ggHardStart); } } } return vertexTree; }
static void getRepeatsTable(struct sqlConnection *conn, char *table, char *chrom, struct rbTree **retAllRepeats, struct rbTree **retNewRepeats) /* Return a tree of ranges for sequence gaps in chromosome from * specified table */ { struct sqlResult *sr; char **row; struct rbTree *allTree = rbTreeNew(simpleRangeCmp); struct rbTree *newTree = rbTreeNew(simpleRangeCmp); char query[256]; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s " "where chrom = \"%s\"", table, chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } } if (prevRange != NULL) rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); sqlFreeResult(&sr); *retAllRepeats = allTree; *retNewRepeats = newTree; } /* static void getRepeatsTable() */
static struct visiSearcher *visiSearcherNew(int wordCount) /* Create a new, empty search structure. */ { struct visiSearcher *searcher; AllocVar(searcher); searcher->tree = rbTreeNew(visiMatchCmpImageId); searcher->wordCount = wordCount; return searcher; }
struct rbTree *wigIntoRangeTree(char *fileName) /* Return a range tree full of wiggle records. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct rbTree *wigTree = rbTreeNew(bedRangeCmp); struct wigSection *section; while ((section = wigSectionRead(lf)) != NULL) rbTreeAdd(wigTree, section); return wigTree; }
struct rbTree *rbTreeFromNetFile(char *fileName) /* Build an rbTree from a net file */ { struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp); struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chainNet *cn = chainNetRead(lf); struct cnFill *fill = NULL; for(fill=cn->fillList; fill != NULL; fill = fill->next) { rbTreeAdd(rbTree, fill); } return rbTree; }
boolean isUniqueCoordAndAgx(char *db, struct intronEv *iv, struct hash *posHash, struct hash *agxHash) /** Return TRUE if iv isn't in posHash and agxHash. Return FALSE otherwise. */ { static char key[1024]; static struct rbTree *bedTree = NULL; boolean unique = TRUE; struct bed *bed = NULL; if(bedTree == NULL) bedTree = rbTreeNew(bedRangeCmp); /* Unique location (don't pick same intron twice. */ if(bedUniqueInTree(bedTree, iv)) { AllocVar(bed); bed->chrom = cloneString(iv->chrom); bed->chromStart = iv->e1S; bed->chromEnd = iv->e2E; rbTreeAdd(bedTree, bed); } else unique = FALSE; /* Unique loci, don't pick from same overall loci if possible. */ safef(key, sizeof(key), "%s", iv->agxName); if(hashFindVal(agxHash, key) == NULL) hashAdd(agxHash, key, iv); else unique = FALSE; /* Definitely don't pick from same mRNA. */ chopSuffix(iv->ev->orthoBedName); safef(key, sizeof(key), "%s", iv->ev->orthoBedName); if(hashFindVal(agxHash, key) == NULL) hashAdd(agxHash, key, iv); else unique = FALSE; if(unique) unique = !checkMgcPicks(db, iv); return unique; }
static struct rbTree *makeEdgeTree(struct linkedBeds *lbList, struct rbTree *vertexTree) /* Make tree of unique edges. */ { struct rbTree *edgeTree = rbTreeNew(edgeCmp); struct linkedBeds *lb; for (lb = lbList; lb != NULL; lb = lb->next) { struct bed *bed, *nextBed; for (bed = lb->bedList; bed != NULL; bed = nextBed) { nextBed = bed->next; /* Loop to add all introns and all but last exon. */ struct vertex *start = matchingVertex(vertexTree, bed->chromStart, ggSoftStart); int i, lastBlock = bed->blockCount-1; for (i=0; i<lastBlock; ++i) { /* Add exon */ struct vertex *end = matchingVertex(vertexTree, start->position + bed->blockSizes[i], ggHardEnd); addUniqueEdge(edgeTree, start, end, lb); /* Add intron */ start = matchingVertex(vertexTree, bed->chromStart + bed->chromStarts[i+1], ggHardStart); addUniqueEdge(edgeTree, end, start, lb); } /* Add final exon */ struct vertex *end = matchingVertex(vertexTree, bed->chromEnd, ggSoftEnd); addUniqueEdge(edgeTree, start, end, lb); /* If there's another bed to go, add a soft intron connecting it. */ if (nextBed != NULL) { start = matchingVertex(vertexTree, nextBed->chromStart, ggSoftStart); addUniqueEdge(edgeTree, end, start, lb); } } } return edgeTree; }
void rbTest(int count) /* Fill up rbTree with count # of nodes and then search for those * nodes and then free it up. */ { int i, j; struct rbTree *tree = rbTreeNew(rbTreeCmpInt); struct lm *lm = tree->lm; for (i=0; i<count; ++i) { int *pt; lmAllocVar(lm, pt); *pt = i; rbTreeAdd(tree, pt); } for (j=0; j<10; ++j) for (i=0; i<count; ++i) if (!rbTreeFind(tree, &i)) errAbort("Couldnt' find %d", i); rbTreeFree(&tree); }
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom) /* Return a tree of ranges for sequence gaps in chromosome */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); int rowOffset; struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct agpGap gap; struct simpleRange *range; agpGapStaticLoad(row+rowOffset, &gap); lmAllocVar(tree->lm, range); range->start = gap.chromStart; range->end = gap.chromEnd; rbTreeAdd(tree, range); } sqlFreeResult(&sr); return tree; }
void addRangeListAsRbTree(char *chrom, struct hash *chromHash, struct range *rangeList, boolean doSort) /* Given a list of ranges for a chrom, sort if specified, merge overlapping * ranges, create a non-merging rbTree of ranges and store it in chromHash. */ { struct rbTree *t = rbTreeNew(rangeCmp); struct range *r = NULL; struct hashEl *hel = hashLookup(chromHash, chrom); if (hel != NULL) errAbort("resultsToTrees: need results ordered by chrom, " "but looks like they weren't for %s.", chrom); if (doSort) slSort(&rangeList, rangeCmpStart); mergeOverlaps(&rangeList); for (r = rangeList; r != NULL; r = r->next) { rbTreeAdd(t, r); } hashAdd(chromHash, chrom, t); }
struct rbTree *getTrf(struct sqlConnection *conn, char *chrom) /* Return a tree of ranges for simple repeats in chromosome. */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); struct simpleRange *range, *prevRange = NULL; char query[256]; struct sqlResult *sr; char **row; sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat " "where chrom = '%s'", chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { lmAllocVar(tree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(tree, prevRange); prevRange = range; } } if (prevRange != NULL) rbTreeAdd(tree, prevRange); sqlFreeResult(&sr); return tree; }
static void getRepeats(struct sqlConnection *conn, struct hash *arHash, char *chrom, struct rbTree **retAllRepeats, struct rbTree **retNewRepeats) /* Return a tree of ranges for sequence gaps in chromosome */ { char *db = sqlGetDatabase(conn); struct sqlResult *sr; char **row; struct rbTree *allTree = rbTreeNew(simpleRangeCmp); struct rbTree *newTree = rbTreeNew(simpleRangeCmp); char tableName[64]; char query[256]; boolean splitRmsk = TRUE; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; safef(tableName, sizeof(tableName), "%s_rmsk", chrom); if (! sqlTableExists(conn, tableName)) { safef(tableName, sizeof(tableName), "rmsk"); if (! sqlTableExists(conn, tableName)) errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n", chrom, db, chrom, db); splitRmsk = FALSE; } if (splitRmsk) sqlSafef(query, sizeof query, "select genoStart,genoEnd,repName,repClass,repFamily from %s", tableName); else sqlSafef(query, sizeof query, "select genoStart,genoEnd,repName,repClass,repFamily from %s " "where genoName = \"%s\"", tableName, chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; char arKey[512]; lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]); if (arHash != NULL && hashLookup(arHash, arKey)) { lmAllocVar(newTree->lm, range); range->start = sqlUnsigned(row[0]); range->end = sqlUnsigned(row[1]); if (prevNewRange == NULL) prevNewRange = range; else if (overlap(range, prevNewRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevNewRange->end) prevNewRange->end = range->end; if (range->start < prevNewRange->start) prevNewRange->start = range->start; } else { rbTreeAdd(allTree, prevNewRange); prevNewRange = range; } } } if (prevRange != NULL) rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); sqlFreeResult(&sr); *retAllRepeats = allTree; *retNewRepeats = newTree; }
struct rbTree *rangeTreeNew() /* Create a new, empty, rangeTree. */ { return rbTreeNew(rangeCmp); }