void getTrfUnsplit(struct sqlConnection *conn, struct hash *chromHash) /* Return a tree of ranges for simple repeats in all chromosomes, * from a single query on the whole (unsplit) simpleRepeat table. */ { struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct simpleRange *range, *prevRange = NULL; struct sqlResult *sr; char **row; char *prevChrom = NULL; sr = sqlGetResult(conn, "NOSQLINJ select chrom,chromStart,chromEnd from simpleRepeat" " order by chrom,chromStart"); while ((row = sqlNextRow(sr)) != NULL) { if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(tree, prevRange); setTrf(prevChrom, chromHash, tree); prevRange = NULL; freeMem(prevChrom); stack = lmAlloc(qLm, 256 * sizeof(stack[0])); tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(row[0]); } lmAllocVar(tree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(tree, prevRange); prevRange = range; } } if (prevChrom != NULL) { rbTreeAdd(tree, prevRange); setTrf(prevChrom, chromHash, tree); freeMem(prevChrom); } sqlFreeResult(&sr); }
void makeChroms(char *fileName, struct lm *lm, struct rbTreeNode **stack, struct hash **retHash, struct chrom **retList) /* Read size file and make chromosome structure for each element. */ { char *row[2]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); struct chrom *chrom, *chromList = NULL; while (lineFileRow(lf, row)) { char *name = row[0]; if (hashLookup(hash, name) != NULL) errAbort("Duplicate %s in %s", name, fileName); AllocVar(chrom); slAddHead(&chromList, chrom); hashAddSaveName(hash, name, chrom, &chrom->name); chrom->size = lineFileNeedNum(lf, row, 1); chrom->spaces = rbTreeNewDetailed(spaceCmp, lm, stack); chrom->root = gapNew(0, chrom->size, 0, 0); addSpaceForGap(chrom, chrom->root); } lineFileClose(&lf); slReverse(&chromList); *retHash = hash; *retList = chromList; }
struct wordTree *wordTreeAddFollowing(struct wordTree *wt, char *word, struct lm *lm, struct rbTreeNode **stack) /* Make word follow wt in tree. If word already exists among followers * return it and bump use count. Otherwise create new one. */ { struct wordTree *w; /* Points to following element if any */ if (wt->following == NULL) { /* Allocate new if you've never seen it before. */ wt->following = rbTreeNewDetailed(wordTreeCmpWord, lm, stack); w = NULL; } else { /* Find word in existing tree */ struct wordTree key; key.word = word; w = rbTreeFind(wt->following, &key); } if (w == NULL) { w = wordTreeNew(word); rbTreeAdd(wt->following, w); } w->useCount += 1; return w; }
struct rbTree *rangeTreeNewDetailed(struct lm *lm, struct rbTreeNode *stack[128]) /* Allocate rangeTree on an existing local memory & stack. This is for cases * where you want a lot of trees, and don't want the overhead for each one. * Note, to clean these up, just do freez(&rbTree) rather than rbFreeTree(&rbTree). */ { return rbTreeNewDetailed(rangeCmp, lm, stack); }
void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash) /* Return a tree of ranges for sequence gaps in all chromosomes, * assuming an unsplit gap table -- when the table is unsplit, it's * probably for a scaffold assembly where we *really* don't want * to do one query per scaffold! */ { struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap"); struct sqlResult *sr; char **row; char *prevChrom = NULL; sr = sqlGetResult(conn, "NOSQLINJ select * from gap order by chrom"); while ((row = sqlNextRow(sr)) != NULL) { struct agpGap gap; struct simpleRange *range; agpGapStaticLoad(row+rowOffset, &gap); if (prevChrom == NULL) prevChrom = cloneString(gap.chrom); else if (! sameString(prevChrom, gap.chrom)) { setNGap(prevChrom, chromHash, tree); freeMem(prevChrom); stack = lmAlloc(qLm, 256 * sizeof(stack[0])); tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(gap.chrom); } lmAllocVar(tree->lm, range); range->start = gap.chromStart; range->end = gap.chromEnd; rbTreeAdd(tree, range); } if (prevChrom != NULL) { setNGap(prevChrom, chromHash, tree); freeMem(prevChrom); } sqlFreeResult(&sr); }
struct rbTree *rbTreeNew(int (*compare)(void *, void *)) /* rbTreeNew() - Allocates space for a red-black tree and returns a pointer * to it. The function compare compares they keys of two items, and returns a * negative, zero, or positive integer depending on whether the first item is * less than, equal to, or greater than the second. */ { /* The stack keeps us from having to keep explicit * parent, grandparent, greatgrandparent variables. * It needs to be big enough for the maximum depth * of tree. Since the whole point of rb trees is * that they are self-balancing, this is not all * that deep, just 2*log2(N). Therefore a stack of * 128 is good for up to 2^64 items in stack, which * should keep us for the next couple of decades... */ struct lm *lm = lmInit(0); struct rbTreeNode **stack = lmAlloc(lm, 128 * sizeof(stack[0])); return rbTreeNewDetailed(compare, lm, stack); }
static void getRepeatsUnsplitTable(struct sqlConnection *conn, struct hash *chromHash, char *table) /* Return a tree of ranges for sequence gaps all chromosomes, * from specified table */ { struct sqlResult *sr; char **row; struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0])); struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack); char *prevChrom = NULL; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; char query[256]; sqlSafef(query, ArraySize(query), "select chrom,chromStart,chromEnd from %s " "order by chrom,chromStart", table); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); prevRange = prevNewRange = NULL; stack = lmAlloc(qLm, 256 * sizeof(stack[0])); allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); prevChrom = cloneString(row[0]); } lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } } if (prevChrom != NULL) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); } sqlFreeResult(&sr); } /* void getRepeatsUnsplitTable() */
static void getRepeatsUnsplit(struct sqlConnection *conn, struct hash *chromHash, struct hash *arHash) /* Return a tree of ranges for sequence gaps all chromosomes, * assuming an unsplit table -- when the table is unsplit, it's * probably for a scaffold assembly where we *really* don't want * to do one query per scaffold! */ { struct sqlResult *sr; char **row; struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0])); struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0])); struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack); char *prevChrom = NULL; struct simpleRange *prevRange = NULL, *prevNewRange = NULL; sr = sqlGetResult(conn, "NOSQLINJ select genoName,genoStart,genoEnd,repName,repClass,repFamily from rmsk " "order by genoName,genoStart"); while ((row = sqlNextRow(sr)) != NULL) { struct simpleRange *range; char arKey[512]; if (prevChrom == NULL) prevChrom = cloneString(row[0]); else if (! sameString(prevChrom, row[0])) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); prevRange = prevNewRange = NULL; stack = lmAlloc(qLm, 256 * sizeof(stack[0])); allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); if (arHash != NULL) { stack = lmAlloc(qLm, 256 * sizeof(stack[0])); newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack); } prevChrom = cloneString(row[0]); } lmAllocVar(allTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevRange == NULL) prevRange = range; else if (overlap(range, prevRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevRange->end) prevRange->end = range->end; if (range->start < prevRange->start) prevRange->start = range->start; } else { rbTreeAdd(allTree, prevRange); prevRange = range; } sprintf(arKey, "%s.%s.%s", row[3], row[4], row[5]); if (arHash != NULL && hashLookup(arHash, arKey)) { lmAllocVar(newTree->lm, range); range->start = sqlUnsigned(row[1]); range->end = sqlUnsigned(row[2]); if (prevNewRange == NULL) prevNewRange = range; else if (overlap(range, prevNewRange)) { /* merge r into prevR & discard; prevR gets passed forward. */ if (range->end > prevNewRange->end) prevNewRange->end = range->end; if (range->start < prevNewRange->start) prevNewRange->start = range->start; } else { rbTreeAdd(newTree, prevNewRange); prevNewRange = range; } } } if (prevChrom != NULL) { rbTreeAdd(allTree, prevRange); if (prevNewRange != NULL) rbTreeAdd(newTree, prevNewRange); setRepeats(prevChrom, chromHash, allTree, newTree); freeMem(prevChrom); } sqlFreeResult(&sr); }