struct hash *resultsToTreesMergeInline(struct sqlResult *sr)
/* Given results of a sorted query on chrom,chromStart,chromEnd, store results 
 * as rbTrees hashed by chrom. */
{
struct hash *chromHash = newHash(18);
char **row = NULL;
struct rbTree *t = rbTreeNew(rangeCmp);
char *prevChrom = NULL;
struct range *prevR = NULL;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct range *r = NULL;
    AllocVar(r);
    r->start = sqlUnsigned(row[1]);
    r->end   = sqlUnsigned(row[2]);
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(t, prevR);
	addRbTree(prevChrom, chromHash, t);
	prevR = NULL;
	freeMem(prevChrom);
	prevChrom = cloneString(row[0]);
	t = rbTreeNew(rangeCmp);
	}
    if (prevR == NULL)
	prevR = r;
    else if (r->start <= prevR->end && prevR->start <= r->end)
	{
	/* Overlap: merge r into prevR & discard; prevR gets passed forward. */
	if (r->end > prevR->end)
	    prevR->end = r->end;
	if (r->start < prevR->start)
	    prevR->start = r->start;
	freez(&r);
	}
    else
	{
	rbTreeAdd(t, prevR);
	prevR = r;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(t, prevR);
    addRbTree(prevChrom, chromHash, t);
    freeMem(prevChrom);
    }
return chromHash;
}
Exemplo n.º 2
0
static struct rbTree *getNewRepeats(char *dirName, char *chrom)
/* Read in repeatMasker .out line format file into a tree of ranges. */
/* Handles lineage-specific files that preserve header */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range;
char fileName[512];
struct lineFile *lf;
char *row[7];
boolean headerDone = FALSE;

sprintf(fileName, "%s/%s.out.spec", dirName, chrom);
lf = lineFileOpen(fileName, TRUE);
while (lineFileRow(lf, row))
    {
    /* skip header lines (don't contain numeric first field) */
    if (!headerDone && atoi(row[0]) == 0)
        continue;
    if (!sameString(chrom, row[4]))
        errAbort("Expecting %s word 5, line %d of %s\n", 
		chrom, lf->lineIx, lf->fileName);
    headerDone = TRUE;
    lmAllocVar(tree->lm, range);
    range->start = lineFileNeedNum(lf, row, 5) - 1;
    range->end = lineFileNeedNum(lf, row, 6);
    rbTreeAdd(tree, range);
    }
lineFileClose(&lf);
return tree;
}
Exemplo n.º 3
0
static struct rbTree *makeVertexTree(struct linkedBeds *lbList)
/* Make tree of unique vertices. */
{
struct rbTree *vertexTree = rbTreeNew(vertexCmp);
struct linkedBeds *lb;
for (lb = lbList; lb != NULL; lb = lb->next)
    {
    struct bed *bed;
    for (bed = lb->bedList; bed != NULL; bed = bed->next)
        {
	/* Add very beginning and end, they'll be soft. */
	addUniqueVertex(vertexTree, bed->chromStart, ggSoftStart);
	addUniqueVertex(vertexTree, bed->chromEnd, ggSoftEnd);

	/* Add internal hard ends. */
	int i, lastBlock = bed->blockCount-1;
	for (i=0; i<lastBlock; ++i)
	    {
	    addUniqueVertex(vertexTree, 
	    	bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i], ggHardEnd);
	    addUniqueVertex(vertexTree, 
	    	bed->chromStart + bed->chromStarts[i+1], ggHardStart);
	    }
	}
    }
return vertexTree;
}
Exemplo n.º 4
0
static void getRepeatsTable(struct sqlConnection *conn, char *table,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome from
 *	specified table */
{
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char query[256];
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s "
	    "where chrom = \"%s\"", table, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}	/*	static void getRepeatsTable()	*/
static struct visiSearcher *visiSearcherNew(int wordCount)
/* Create a new, empty search structure. */
{
struct visiSearcher *searcher;
AllocVar(searcher);
searcher->tree = rbTreeNew(visiMatchCmpImageId);
searcher->wordCount = wordCount;
return searcher;
}
struct rbTree *wigIntoRangeTree(char *fileName)
/* Return a range tree full of wiggle records. */
{
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct rbTree *wigTree = rbTreeNew(bedRangeCmp);
    struct wigSection *section;
    while ((section = wigSectionRead(lf)) != NULL)
        rbTreeAdd(wigTree, section);
    return wigTree;
}
Exemplo n.º 7
0
struct rbTree *rbTreeFromNetFile(char *fileName)
/* Build an rbTree from a net file */
{
struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chainNet *cn = chainNetRead(lf);
struct cnFill *fill = NULL;
for(fill=cn->fillList; fill != NULL; fill = fill->next)
    {
    rbTreeAdd(rbTree, fill);
    }
return rbTree;
}
boolean isUniqueCoordAndAgx(char *db, struct intronEv *iv, struct hash *posHash, struct hash *agxHash)
/** Return TRUE if iv isn't in posHash and agxHash.
   Return FALSE otherwise. */
{
static char key[1024];
static struct rbTree *bedTree = NULL;
boolean unique = TRUE;
struct bed *bed = NULL;
if(bedTree == NULL)
    bedTree = rbTreeNew(bedRangeCmp);
/* Unique location (don't pick same intron twice. */
if(bedUniqueInTree(bedTree, iv))
    {
    AllocVar(bed);
    bed->chrom = cloneString(iv->chrom);
    bed->chromStart = iv->e1S;
    bed->chromEnd = iv->e2E;
    rbTreeAdd(bedTree, bed);
    }
else 
    unique = FALSE;

/* Unique loci, don't pick from same overall loci if possible. */
safef(key, sizeof(key), "%s", iv->agxName);
if(hashFindVal(agxHash, key) == NULL)
    hashAdd(agxHash, key, iv);
else
    unique = FALSE;


/* Definitely don't pick from same mRNA. */
chopSuffix(iv->ev->orthoBedName);
safef(key, sizeof(key), "%s", iv->ev->orthoBedName);
if(hashFindVal(agxHash, key) == NULL)
    hashAdd(agxHash, key, iv);
else
    unique = FALSE;

if(unique)
    unique = !checkMgcPicks(db, iv);

return unique;
}
Exemplo n.º 9
0
static struct rbTree *makeEdgeTree(struct linkedBeds *lbList, struct rbTree *vertexTree)
/* Make tree of unique edges. */
{
struct rbTree *edgeTree = rbTreeNew(edgeCmp);
struct linkedBeds *lb;
for (lb = lbList; lb != NULL; lb = lb->next)
    {
    struct bed *bed, *nextBed;
    for (bed = lb->bedList; bed != NULL; bed = nextBed)
        {
	nextBed = bed->next;

	/* Loop to add all introns and all but last exon. */
	struct vertex *start = matchingVertex(vertexTree, bed->chromStart, ggSoftStart);
	int i, lastBlock = bed->blockCount-1;
	for (i=0; i<lastBlock; ++i)
	    {
	    /* Add exon */
	    struct vertex *end = matchingVertex(vertexTree,
		start->position + bed->blockSizes[i], ggHardEnd);
	    addUniqueEdge(edgeTree, start, end, lb);

	    /* Add intron */
	    start = matchingVertex(vertexTree, 
		    bed->chromStart + bed->chromStarts[i+1], ggHardStart);
	    addUniqueEdge(edgeTree, end, start, lb);
	    }

	/* Add final exon */
	struct vertex *end = matchingVertex(vertexTree, bed->chromEnd, ggSoftEnd);
	addUniqueEdge(edgeTree, start, end, lb);

	/* If there's another bed to go, add a soft intron connecting it. */
	if (nextBed != NULL)
	    {
	    start = matchingVertex(vertexTree, nextBed->chromStart, ggSoftStart);
	    addUniqueEdge(edgeTree, end, start, lb);
	    }
	}
    }
return edgeTree;
}
Exemplo n.º 10
0
void rbTest(int count)
/* Fill up rbTree with count # of nodes and then search for those
 * nodes and then free it up. */
{
    int i, j;
    struct rbTree *tree = rbTreeNew(rbTreeCmpInt);
    struct lm *lm = tree->lm;
    for (i=0; i<count; ++i)
    {
        int *pt;
        lmAllocVar(lm, pt);
        *pt = i;
        rbTreeAdd(tree, pt);
    }
    for (j=0; j<10; ++j)
        for (i=0; i<count; ++i)
            if (!rbTreeFind(tree, &i))
                errAbort("Couldnt' find %d", i);
    rbTreeFree(&tree);
}
Exemplo n.º 11
0
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for sequence gaps in chromosome */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
int rowOffset;
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
sqlFreeResult(&sr);
return tree;
}
void addRangeListAsRbTree(char *chrom, struct hash *chromHash,
			  struct range *rangeList, boolean doSort)
/* Given a list of ranges for a chrom, sort if specified, merge overlapping
 * ranges, create a non-merging rbTree of ranges and store it in chromHash. */
{
struct rbTree *t = rbTreeNew(rangeCmp);
struct range *r = NULL;
struct hashEl *hel = hashLookup(chromHash, chrom);

if (hel != NULL)
    errAbort("resultsToTrees: need results ordered by chrom, "
	     "but looks like they weren't for %s.", chrom);
if (doSort)
    slSort(&rangeList, rangeCmpStart);
mergeOverlaps(&rangeList);
for (r = rangeList;  r != NULL;  r = r->next)
    {
    rbTreeAdd(t, r);
    }
hashAdd(chromHash, chrom, t);
}
Exemplo n.º 13
0
struct rbTree *getTrf(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for simple repeats in chromosome. */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range, *prevRange = NULL;
char query[256];
struct sqlResult *sr;
char **row;

sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat "
               "where chrom = '%s'",
	       chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    lmAllocVar(tree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(tree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(tree, prevRange);
sqlFreeResult(&sr);
return tree;
}
Exemplo n.º 14
0
static void getRepeats(struct sqlConnection *conn, struct hash *arHash,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome */
{
char *db = sqlGetDatabase(conn);
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char tableName[64];
char query[256];
boolean splitRmsk = TRUE;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

safef(tableName, sizeof(tableName), "%s_rmsk", chrom);
if (! sqlTableExists(conn, tableName))
    {
    safef(tableName, sizeof(tableName), "rmsk");
    if (! sqlTableExists(conn, tableName))
	errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n",
		 chrom, db, chrom, db);
    splitRmsk = FALSE;
    }
if (splitRmsk)
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s",
	    tableName);
else
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s "
	    "where genoName = \"%s\"",
	    tableName, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[0]);
	range->end = sqlUnsigned(row[1]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(allTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}
Exemplo n.º 15
0
struct rbTree *rangeTreeNew()
/* Create a new, empty, rangeTree. */
{
return rbTreeNew(rangeCmp);
}