예제 #1
0
void getTrfUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for simple repeats in all chromosomes, 
 * from a single query on the whole (unsplit) simpleRepeat table. */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct simpleRange *range, *prevRange = NULL;
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

sr = sqlGetResult(conn, "NOSQLINJ select chrom,chromStart,chromEnd from simpleRepeat"
		  " order by chrom,chromStart");
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(tree, prevRange);
	setTrf(prevChrom, chromHash, tree);
	prevRange = NULL;
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(tree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(tree, prevRange);
	prevRange = range;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(tree, prevRange);
    setTrf(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}
struct hash *resultsToTreesMergeInline(struct sqlResult *sr)
/* Given results of a sorted query on chrom,chromStart,chromEnd, store results 
 * as rbTrees hashed by chrom. */
{
struct hash *chromHash = newHash(18);
char **row = NULL;
struct rbTree *t = rbTreeNew(rangeCmp);
char *prevChrom = NULL;
struct range *prevR = NULL;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct range *r = NULL;
    AllocVar(r);
    r->start = sqlUnsigned(row[1]);
    r->end   = sqlUnsigned(row[2]);
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(t, prevR);
	addRbTree(prevChrom, chromHash, t);
	prevR = NULL;
	freeMem(prevChrom);
	prevChrom = cloneString(row[0]);
	t = rbTreeNew(rangeCmp);
	}
    if (prevR == NULL)
	prevR = r;
    else if (r->start <= prevR->end && prevR->start <= r->end)
	{
	/* Overlap: merge r into prevR & discard; prevR gets passed forward. */
	if (r->end > prevR->end)
	    prevR->end = r->end;
	if (r->start < prevR->start)
	    prevR->start = r->start;
	freez(&r);
	}
    else
	{
	rbTreeAdd(t, prevR);
	prevR = r;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(t, prevR);
    addRbTree(prevChrom, chromHash, t);
    freeMem(prevChrom);
    }
return chromHash;
}
예제 #3
0
파일: wordChain.c 프로젝트: bowhan/kent
struct wordTree *wordTreeAddFollowing(struct wordTree *wt, char *word, 
	struct lm *lm, struct rbTreeNode **stack)
/* Make word follow wt in tree.  If word already exists among followers
 * return it and bump use count.  Otherwise create new one. */
{
struct wordTree *w;   /* Points to following element if any */
if (wt->following == NULL)
    {
    /* Allocate new if you've never seen it before. */
    wt->following = rbTreeNewDetailed(wordTreeCmpWord, lm, stack);
    w = NULL;
    }
else
    {
    /* Find word in existing tree */
    struct wordTree key;
    key.word = word;
    w = rbTreeFind(wt->following, &key);
    }
if (w == NULL)
    {
    w = wordTreeNew(word);
    rbTreeAdd(wt->following, w);
    }
w->useCount += 1;
return w;
}
예제 #4
0
static struct rbTree *getNewRepeats(char *dirName, char *chrom)
/* Read in repeatMasker .out line format file into a tree of ranges. */
/* Handles lineage-specific files that preserve header */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range;
char fileName[512];
struct lineFile *lf;
char *row[7];
boolean headerDone = FALSE;

sprintf(fileName, "%s/%s.out.spec", dirName, chrom);
lf = lineFileOpen(fileName, TRUE);
while (lineFileRow(lf, row))
    {
    /* skip header lines (don't contain numeric first field) */
    if (!headerDone && atoi(row[0]) == 0)
        continue;
    if (!sameString(chrom, row[4]))
        errAbort("Expecting %s word 5, line %d of %s\n", 
		chrom, lf->lineIx, lf->fileName);
    headerDone = TRUE;
    lmAllocVar(tree->lm, range);
    range->start = lineFileNeedNum(lf, row, 5) - 1;
    range->end = lineFileNeedNum(lf, row, 6);
    rbTreeAdd(tree, range);
    }
lineFileClose(&lf);
return tree;
}
예제 #5
0
static void getRepeatsTable(struct sqlConnection *conn, char *table,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome from
 *	specified table */
{
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char query[256];
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s "
	    "where chrom = \"%s\"", table, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}	/*	static void getRepeatsTable()	*/
struct rbTree *wigIntoRangeTree(char *fileName)
/* Return a range tree full of wiggle records. */
{
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct rbTree *wigTree = rbTreeNew(bedRangeCmp);
    struct wigSection *section;
    while ((section = wigSectionRead(lf)) != NULL)
        rbTreeAdd(wigTree, section);
    return wigTree;
}
예제 #7
0
void addSpaceForGap(struct chrom *chrom, struct gap *gap)
/* Given a gap, create corresponding space in chromosome's
 * space rbTree. */
{
struct space *space;
AllocVar(space);
space->gap = gap;
space->start = gap->start;
space->end = gap->end;
rbTreeAdd(chrom->spaces, space);
}
예제 #8
0
struct rbTree *rbTreeFromNetFile(char *fileName)
/* Build an rbTree from a net file */
{
struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chainNet *cn = chainNetRead(lf);
struct cnFill *fill = NULL;
for(fill=cn->fillList; fill != NULL; fill = fill->next)
    {
    rbTreeAdd(rbTree, fill);
    }
return rbTree;
}
예제 #9
0
static void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge)
/* Add edge back if it is still unique, otherwise move evidence from
 * edge into existing edge. */
{
struct edge *existing = rbTreeFind(edgeTree, edge);
if (existing)
    {
    existing->evList = slCat(existing->evList, edge->evList);
    edge->evList = NULL;
    }
else
    rbTreeAdd(edgeTree, edge);
}
예제 #10
0
static struct vertex *addUniqueVertex(struct rbTree *tree, int position, enum ggVertexType type)
/* Find existing vertex if it exists, otherwise create and return new one. */
{
struct vertex *v = matchingVertex(tree, position, type);
if (v == NULL)
    {
    lmAllocVar(tree->lm, v);
    v->position = position;
    v->type = type;
    rbTreeAdd(tree, v);
    }
return v;
}
예제 #11
0
struct rbTree *getTrf(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for simple repeats in chromosome. */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range, *prevRange = NULL;
char query[256];
struct sqlResult *sr;
char **row;

sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat "
               "where chrom = '%s'",
	       chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    lmAllocVar(tree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(tree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(tree, prevRange);
sqlFreeResult(&sr);
return tree;
}
예제 #12
0
struct fill *fillSpace(struct chrom *chrom, struct space *space, 
	struct chain *chain, struct cBlock *startBlock, 
	boolean isQ)
/* Fill in space with chain, remove existing space from chrom,
 * and add smaller spaces on either side if big enough. */
{
struct fill *fill;
int s, e;
struct space *lSpace, *rSpace;

if (!innerBounds(startBlock, isQ, space->start, space->end, &s, &e))
    return NULL;
assert(s < e);
AllocVar(fill);
fill->start = s;
fill->end = e;
fill->chain = chain;
rbTreeRemove(chrom->spaces, space);
if (s - space->start >= minSpace)
    {
    AllocVar(lSpace);
    lSpace->gap = space->gap;
    lSpace->start = space->start;
    lSpace->end = s;
    rbTreeAdd(chrom->spaces, lSpace);
    }
if (space->end - e >= minSpace)
    {
    AllocVar(rSpace);
    rSpace->gap = space->gap;
    rSpace->start = e;
    rSpace->end = space->end;
    rbTreeAdd(chrom->spaces, rSpace);
    }
slAddHead(&space->gap->fillList, fill);
return fill;
}
예제 #13
0
static struct range *rangeTreeAddValHead(struct rbTree *tree, int start, int end, struct slName **newVal) {
	struct range *r, *existing;
	struct slName *head;
	AllocVar(r);
	r->start = start;
	r->end = end;
	r->val = *newVal;
	while ((existing = rbTreeRemove(tree, r))) {
    		r->start = min(r->start, existing->start);
		r->end = max(r->end, existing->end);
		head = (struct slName *)(existing->val);
   		slAddHead(&head, *newVal);
		r->val = head;
	}
	rbTreeAdd(tree, r);
	return r;
}
static struct visiMatch *visiSearcherAdd(struct visiSearcher *searcher,
	int imageId, double weight, int startWord, int wordCount)
/* Add given weight to match involving imageId,  creating
 * a fresh match if necessary for imageId. */
{
struct visiMatch key, *match;
key.imageId = imageId;
match = rbTreeFind(searcher->tree, &key);
if (match == NULL)
    {
    match = visiMatchNew(imageId, searcher->wordCount);
    slAddHead(&searcher->matchList, match);
    rbTreeAdd(searcher->tree, match);
    }
match->weight += weight;
assert(startWord + wordCount <= searcher->wordCount);
bitSetRange(match->wordBits, startWord, wordCount);
return match;
}
boolean isUniqueCoordAndAgx(char *db, struct intronEv *iv, struct hash *posHash, struct hash *agxHash)
/** Return TRUE if iv isn't in posHash and agxHash.
   Return FALSE otherwise. */
{
static char key[1024];
static struct rbTree *bedTree = NULL;
boolean unique = TRUE;
struct bed *bed = NULL;
if(bedTree == NULL)
    bedTree = rbTreeNew(bedRangeCmp);
/* Unique location (don't pick same intron twice. */
if(bedUniqueInTree(bedTree, iv))
    {
    AllocVar(bed);
    bed->chrom = cloneString(iv->chrom);
    bed->chromStart = iv->e1S;
    bed->chromEnd = iv->e2E;
    rbTreeAdd(bedTree, bed);
    }
else 
    unique = FALSE;

/* Unique loci, don't pick from same overall loci if possible. */
safef(key, sizeof(key), "%s", iv->agxName);
if(hashFindVal(agxHash, key) == NULL)
    hashAdd(agxHash, key, iv);
else
    unique = FALSE;


/* Definitely don't pick from same mRNA. */
chopSuffix(iv->ev->orthoBedName);
safef(key, sizeof(key), "%s", iv->ev->orthoBedName);
if(hashFindVal(agxHash, key) == NULL)
    hashAdd(agxHash, key, iv);
else
    unique = FALSE;

if(unique)
    unique = !checkMgcPicks(db, iv);

return unique;
}
예제 #16
0
void rbTest(int count)
/* Fill up rbTree with count # of nodes and then search for those
 * nodes and then free it up. */
{
    int i, j;
    struct rbTree *tree = rbTreeNew(rbTreeCmpInt);
    struct lm *lm = tree->lm;
    for (i=0; i<count; ++i)
    {
        int *pt;
        lmAllocVar(lm, pt);
        *pt = i;
        rbTreeAdd(tree, pt);
    }
    for (j=0; j<10; ++j)
        for (i=0; i<count; ++i)
            if (!rbTreeFind(tree, &i))
                errAbort("Couldnt' find %d", i);
    rbTreeFree(&tree);
}
예제 #17
0
void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for sequence gaps in all chromosomes, 
 * assuming an unsplit gap table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap");
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

sr = sqlGetResult(conn, "NOSQLINJ select * from gap order by chrom");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    if (prevChrom == NULL)
	prevChrom = cloneString(gap.chrom);
    else if (! sameString(prevChrom, gap.chrom))
	{
	setNGap(prevChrom, chromHash, tree);
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(gap.chrom);
	}
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
if (prevChrom != NULL)
    {
    setNGap(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}
예제 #18
0
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for sequence gaps in chromosome */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
int rowOffset;
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
sqlFreeResult(&sr);
return tree;
}
void addRangeListAsRbTree(char *chrom, struct hash *chromHash,
			  struct range *rangeList, boolean doSort)
/* Given a list of ranges for a chrom, sort if specified, merge overlapping
 * ranges, create a non-merging rbTree of ranges and store it in chromHash. */
{
struct rbTree *t = rbTreeNew(rangeCmp);
struct range *r = NULL;
struct hashEl *hel = hashLookup(chromHash, chrom);

if (hel != NULL)
    errAbort("resultsToTrees: need results ordered by chrom, "
	     "but looks like they weren't for %s.", chrom);
if (doSort)
    slSort(&rangeList, rangeCmpStart);
mergeOverlaps(&rangeList);
for (r = rangeList;  r != NULL;  r = r->next)
    {
    rbTreeAdd(t, r);
    }
hashAdd(chromHash, chrom, t);
}
예제 #20
0
struct range *rangeTreeAddVal(struct rbTree *tree, int start, int end, void *val, void *(*mergeVals)(void *existingVal, void *newVal) )
/* Add range to tree, merging with existing ranges if need be. 
 * If this is a new range, set the value to this val.
 * If there are existing items for this range, and if mergeVals function is not null, 
 * apply mergeVals to the existing values and this new val, storing the result as the val
 * for this range (see rangeTreeAddValCount() and rangeTreeAddValList() below for examples). */
{
struct range *r, *existing;
r = lmAlloc(tree->lm, sizeof(*r)); /* alloc new zeroed range */
r->start = start;
r->end = end;
r->val = val;
while ((existing = rbTreeRemove(tree, r)) != NULL)
    {
    r->start = min(r->start, existing->start);
    r->end = max(r->end, existing->end);
    if (mergeVals)
	r->val = mergeVals(existing->val, r->val);
    }
rbTreeAdd(tree, r);
return r;
}
예제 #21
0
static struct edge *addUniqueEdge(struct rbTree *tree, struct vertex *start, struct vertex *end,
	struct linkedBeds *lb)
/* Find existing edge if it exists.  Otherwise create and return new one. 
 * Regardless add lb as evidence to edge. */
{
struct edge *e = matchingEdge(tree, start, end);
if (e == NULL)
    {
    lmAllocVar(tree->lm, e);
    e->start = start;
    e->end = end;
    e->next = NULL;
    rbTreeAdd(tree, e);
    }
struct evidence *ev;
lmAllocVar(tree->lm, ev);
ev->lb = lb;
ev->start = start->position;
ev->end = end->position;
slAddHead(&e->evList, ev);
return e;
}
예제 #22
0
void rangeTreeAddToCoverageDepth(struct rbTree *tree, int start, int end)
/* Add area from start to end to a tree that is being built up to store the
 * depth of coverage.  Recover coverage back out by looking at ptToInt(range->val)
 * on tree elements. */
{
struct range q;
q.start = start;
q.end = end;

struct range *r, *existing = rbTreeFind(tree, &q);
if (existing == NULL)
    {
    lmAllocVar(tree->lm, r);
    r->start = start;
    r->end = end;
    r->val = intToPt(1);
    rbTreeAdd(tree, r);
    }
else
    {
    if (existing->start <= start && existing->end >= end)
    /* The existing one completely encompasses us */
        {
	/* Make a new section for the bit before start. */
	if (existing->start < start)
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = existing->start;
	    r->end = start;
	    r->val = existing->val;
	    existing->start = start;
	    rbTreeAdd(tree, r);
	    }
	/* Make a new section for the bit after end. */
	if (existing->end > end)
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = end;
	    r->end = existing->end;
	    r->val = existing->val;
	    existing->end = end;
	    rbTreeAdd(tree, r);
	    }
	/* Increment existing section in overlapping area. */
        existing->val = (char *)(existing->val) + 1;
	}
    else
    /* In general case fetch list of regions that overlap us. 
       Remaining cases to handle are: 
	     r >> e     rrrrrrrrrrrrrrrrrrrr
			     eeeeeeeeee

	     e < r           rrrrrrrrrrrrrrr
			eeeeeeeeeeee

	     r < e      rrrrrrrrrrrr
			     eeeeeeeeeeeee
     */
        {
	struct range *existingList = rangeTreeAllOverlapping(tree, start, end);

#ifdef DEBUG
	/* Make sure that list is really sorted for debugging... */
	int lastStart = existingList->start;
	for (r = existingList; r != NULL; r = r->next)
	    {
	    int start = r->start;
	    if (start < lastStart)
	        internalErr();
	    }
#endif /* DEBUG */

	int s = start, e = end;
	for (existing = existingList; existing != NULL; existing = existing->next)
	    {
	    /* Deal with start of new range that comes before existing */
	    if (s < existing->start)
	        {
		lmAllocVar(tree->lm, r);
		r->start = s;
		r->end = existing->start;
		r->val = intToPt(1);
		s = existing->start;
		rbTreeAdd(tree, r);
		}
	    else if (s > existing->start)
	        {
		lmAllocVar(tree->lm, r);
		r->start = existing->start;
		r->end = s;
		r->val = existing->val;
		existing->start = s;
		rbTreeAdd(tree, r);
		}
	    existing->val = (char *)(existing->val) + 1;
	    s = existing->end;
	    }
	if (s < e)
	/* Deal with end of new range that doesn't overlap with anything. */
	    {
	    lmAllocVar(tree->lm, r);
	    r->start = s;
	    r->end = e;
	    r->val = intToPt(1);
	    rbTreeAdd(tree, r);
	    }
	}
    }

}
예제 #23
0
static void getRepeats(struct sqlConnection *conn, struct hash *arHash,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome */
{
char *db = sqlGetDatabase(conn);
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char tableName[64];
char query[256];
boolean splitRmsk = TRUE;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

safef(tableName, sizeof(tableName), "%s_rmsk", chrom);
if (! sqlTableExists(conn, tableName))
    {
    safef(tableName, sizeof(tableName), "rmsk");
    if (! sqlTableExists(conn, tableName))
	errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n",
		 chrom, db, chrom, db);
    splitRmsk = FALSE;
    }
if (splitRmsk)
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s",
	    tableName);
else
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s "
	    "where genoName = \"%s\"",
	    tableName, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[0]);
	range->end = sqlUnsigned(row[1]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(allTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}
예제 #24
0
static void getRepeatsUnsplitTable(struct sqlConnection *conn,
	struct hash *chromHash, char *table)
/* Return a tree of ranges for sequence gaps all chromosomes, 
 *	from specified table
 */
{
struct sqlResult *sr;
char **row;
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
char *prevChrom = NULL;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
char query[256];


sqlSafef(query, ArraySize(query), "select chrom,chromStart,chromEnd from %s "
    "order by chrom,chromStart", table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(allTree, prevRange);
	if (prevNewRange != NULL)
	    rbTreeAdd(newTree, prevNewRange);
	setRepeats(prevChrom, chromHash, allTree, newTree);
	freeMem(prevChrom);
	prevRange = prevNewRange = NULL;
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(allTree, prevRange);
    if (prevNewRange != NULL)
	rbTreeAdd(newTree, prevNewRange);
    setRepeats(prevChrom, chromHash, allTree, newTree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}	/*	void getRepeatsUnsplitTable()	*/
예제 #25
0
static void getRepeatsUnsplit(struct sqlConnection *conn,
	struct hash *chromHash, struct hash *arHash)
/* Return a tree of ranges for sequence gaps all chromosomes, 
 * assuming an unsplit table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct sqlResult *sr;
char **row;
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
char *prevChrom = NULL;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

sr = sqlGetResult(conn,
    "NOSQLINJ select genoName,genoStart,genoEnd,repName,repClass,repFamily from rmsk "
    "order by genoName,genoStart");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(allTree, prevRange);
	if (prevNewRange != NULL)
	    rbTreeAdd(newTree, prevNewRange);
	setRepeats(prevChrom, chromHash, allTree, newTree);
	freeMem(prevChrom);
	prevRange = prevNewRange = NULL;
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	if (arHash != NULL)
	    {
	    stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	    newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	    }
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[3], row[4], row[5]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[1]);
	range->end = sqlUnsigned(row[2]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(newTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(allTree, prevRange);
    if (prevNewRange != NULL)
	rbTreeAdd(newTree, prevNewRange);
    setRepeats(prevChrom, chromHash, allTree, newTree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}
예제 #26
0
static void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Merge together overlapping edges with soft ends. */
{
struct mergedEdge
/* Hold together info on a merged edge. */
    {
    struct evidence *evidence;
    };

/* Traverse graph and build up range tree.  Each node in the range tree
 * will represent the bounds of coordinates of overlapping double softs */
struct rbTree *rangeTree = rangeTreeNew(0);
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start = edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggSoftStart && end->type == ggSoftEnd)
        rangeTreeAdd(rangeTree, start->position, end->position);
    }

/* Traverse graph again merging edges */
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start= edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggSoftStart && end->type == ggSoftEnd)
        {
	struct range *r = rangeTreeFindEnclosing(rangeTree,
		start->position, end->position);
	assert(r != NULL);
	/* At this point, r represents the bounds of a double-soft
	 * region that encompasses this edge.  Collect the set of
	 * evidence of edges overlapping this range */
        struct mergedEdge *mergeEdge = r->val;
        if (mergeEdge == NULL)
            {
            lmAllocVar(rangeTree->lm, mergeEdge);
            r->val = mergeEdge;
            }
        mergeEdge->evidence = slCat(edge->evList, mergeEdge->evidence);
	verbose(3, "Merging doubly-soft edge (%d,%d) into range (%d,%d)\n", 
		start->position, end->position, r->start, r->end);
        edge->evList = NULL;
        rbTreeRemove(edgeTree, edge);
	}
    }

/* Traverse merged edge list, making a single edge from each range. At this point,
 * each range will have some evidence attached to it, from each of the double softs
 * that fall within the range.  From all of this evidence, make a single consensus edge */
struct range *r;
struct lm *lm = lmInit(0);
for (r = rangeTreeList(rangeTree); r != NULL; r = r->next)
    {
    struct mergedEdge *mergedEdge = r->val;
    struct edge *edge = edgeFromConsensusOfEvidence(vertexTree, mergedEdge->evidence, lm);
    if (edge != NULL)
        rbTreeAdd(edgeTree, edge);
    verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", 
	    edge->start->position, edge->end->position, r->start, r->end);
    }


/* Clean up and go home. */
lmCleanup(&lm);
removeUnusedVertices(vertexTree, edgeTree);
slFreeList(&edgeRefList);
rbTreeFree(&rangeTree);
}