void doOneChrom(char *database, char *chrom, char *rnaTable, char *expTable, FILE *f)
/* Process one chromosome. */
{
int chromSize = hChromSize(database, chrom);
struct binKeeper *bk = binKeeperNew(0, chromSize);
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;
char **row;
struct bed *exp, *rna;
int rowOffset;
struct binElement *be, *beList;
int oneCount;

/* Load up expTable into bin-keeper. */
sr = hChromQuery(conn, expTable, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    exp = bedLoadN(row + rowOffset, 12);
    binKeeperAdd(bk, exp->chromStart, exp->chromEnd, exp);
    }
sqlFreeResult(&sr);

/* Loop through rnaTable and look at intersections. */
sr = hChromQuery(conn, rnaTable, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    rna = bedLoadN(row + rowOffset, 12);
    beList = binKeeperFind(bk, rna->chromStart, rna->chromEnd);
    oneCount = 0;
    for (be = beList; be != NULL; be = be->next)
        {
	exp = be->val;
	if (exp->strand[0] == rna->strand[0])
	    {
	    ++oneCount;
	    ++hitCount;
//	    fprintf(f, "%s:%d-%d\t%s\t%s\n", 
//	    	rna->chrom, rna->chromStart, rna->chromEnd, rna->name, exp->name);
	    }
	}
    slFreeList(&beList);
    if (oneCount == 0)
	{
        ++missCount;
	fprintf(f, "miss %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
	}
    else if (oneCount == 1)
	{
	fprintf(f, "uniq %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
        ++uniqCount;
	}
    else
	{
	fprintf(f, "dupe %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
        ++dupeCount;
	}
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}
Exemple #2
0
void getChromSizes(char *database, struct hash **retHash, 
	struct chromSizes **retList)
/* Return hash of chromSizes.  Also calculates size without
 * gaps. */
{
struct sqlConnection *conn = hAllocConn(database);
struct chromInfo *ci, *ciList = getAllChromInfo(database);
struct sqlResult *sr;
char **row;
struct chromSizes *cs, *csList = NULL;
struct hash *hash = newHash(8);
int rowOffset;

for (ci = ciList; ci != NULL; ci = ci->next)
    {
    AllocVar(cs);
    hashAddSaveName(hash, ci->chrom, cs, &cs->name);
    slAddHead(&csList, cs);
    cs->totalSize = ci->size;
    sr = hChromQuery(conn, "gold", ci->chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
        {
	struct agpFrag frag;
	agpFragStaticLoad(row + rowOffset, &frag);
	cs->seqSize += frag.chromEnd - frag.chromStart;
	}
    sqlFreeResult(&sr);
    }
hFreeConn(&conn);
slReverse(&csList);
*retHash = hash;
*retList = csList;
}
void scanChromTable(struct sqlConnection *conn, char *chrom, char *table)
/* Scan chromosome table, don't do anything with data. */
{
struct sqlResult *sr;
int rowOffset;
char **row;

sr = hChromQuery(conn, table, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    ;
sqlFreeResult(&sr);
}
void bestProbeOverlap(struct sqlConnection *conn, char *probeTable, 
	struct genePred *gpList, struct hash *gpToProbeHash)
/* Create hash of most overlapping probe if any for each gene. Require
 * at least 100 base overlap. */
{
/* Create a hash of binKeepers filled with probes. */
struct hash *keeperHash = keepersForChroms(conn);
struct hashCookie it = hashFirst(keeperHash);
struct hashEl *hel;
int pslCount = 0;
while ((hel = hashNext(&it)) != NULL)
    {
    char *chrom = hel->name;
    struct binKeeper *bk = hel->val;
    int rowOffset;
    struct sqlResult *sr = hChromQuery(conn, probeTable, chrom, NULL, &rowOffset);
    char **row;
    while ((row = sqlNextRow(sr)) != NULL)
        {
	struct psl *psl = pslLoad(row+rowOffset);
	binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
	++pslCount;
	}
    sqlFreeResult(&sr);
    }
verbose(2, "Loaded %d psls from %s\n", pslCount, probeTable);

/* Loop through gene list, finding best probe if any for each gene. */
struct genePred *gp;
for (gp = gpList; gp != NULL; gp = gp->next)
    {
    struct rbTree *rangeTree = genePredToRangeTree(gp, FALSE);
    struct psl *bestPsl = NULL;
    int bestOverlap = 99;	/* MinOverlap - 1 */
    struct binKeeper *bk = hashMustFindVal(keeperHash, gp->chrom);
    struct binElement *bin, *binList = binKeeperFind(bk, gp->txStart, gp->txEnd);
    for (bin = binList; bin != NULL; bin = bin->next)
        {
	struct psl *psl = bin->val;
	if (psl->strand[0] == gp->strand[0])
	    {
	    int overlap = pslRangeTreeOverlap(psl, rangeTree);
	    if (overlap > bestOverlap)
		{
		bestOverlap = overlap;
		bestPsl = psl;
		}
	    }
	}
    if (bestPsl != NULL)
        hashAdd(gpToProbeHash, gp->name, bestPsl->qName);
    }
}
Exemple #5
0
void restrictGaps(char *database, UBYTE *cov, int size, char *chrom)
/* Mark gaps as off-limits. */
{
int rowOffset;
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;
int s,e;

while ((row = sqlNextRow(sr)) != NULL)
    {
    s = sqlUnsigned(row[1+rowOffset]);
    e = sqlUnsigned(row[2+rowOffset]);
    assert(s >= 0);
    assert(e <= size);
    memset(cov + s, restricted, e - s);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}
Exemple #6
0
struct pslReader *pslReaderChromQuery(struct sqlConnection* conn,
                                      char* table, char* chrom,
                                      char* extraWhere)
/* Create a new pslReader to read all rows for a chrom in a database table.
 * If extraWhere is not null, it is added as an additional where condition. It
 * will determine if pslx columns are in the table. */
{
struct pslReader* pr;
int rowOffset;
AllocVar(pr);
pr->table = cloneString(table);

/* non-existant table will return null */
pr->sr = hChromQuery(conn, table, chrom, extraWhere, &rowOffset);
if (pr->sr != NULL)
    getTableInfo(pr);

assert(pr->rowOffset == rowOffset);
return pr;
}
Exemple #7
0
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for sequence gaps in chromosome */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
int rowOffset;
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
sqlFreeResult(&sr);
return tree;
}
struct genePred *loadGenePred(char *database, char *chrom, char *track, struct binKeeper *bk)
/* Load in a gene prediction track to bk. */
{
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;
char **row;
int rowOffset;
struct genePred *list = NULL, *el;

sr = hChromQuery(conn, track, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = genePredLoad(row + rowOffset);
    binKeeperAdd(bk, el->txStart, el->txEnd, el);
    slAddHead(&list, el);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&list);
return list;
}
Exemple #9
0
int countBases(struct sqlConnection *conn, char *chrom, int chromSize,
    char *database)
/* Count bases, generally not including gaps, in chromosome. */
{
static boolean gapsLoaded = FALSE;
struct sqlResult *sr;
int totalGaps = 0;
char **row;
int rowOffset;

if (countGaps)
    return chromSize;

/*	If doing all chroms, then load up all the gaps and be done with
 *	it instead of re-reading the gap table for every chrom
 */
if (sameWord(clChrom,"all"))
    {
    if (!gapsLoaded)
	gapHash = loadAllGaps(conn, database);
    gapsLoaded = TRUE;
    totalGaps = hashIntValDefault(gapHash, chrom, 0);
    }
else
    {
    sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	int gapSize;
	struct agpGap gap;
	agpGapStaticLoad(row+rowOffset, &gap);
	gapSize = gap.chromEnd - gap.chromStart;
	totalGaps += gapSize;
	}
    sqlFreeResult(&sr);
    }
return chromSize - totalGaps;
}
Exemple #10
0
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) 
/** Load all of the desired alignments into the chromkeeper structure
    from the desired pslTables. */
{
int i = 0;
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = 0;
struct psl *pslList = NULL, *psl = NULL;
for(i = 0; i < numDbTables; i++)
    {
    sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); 
    while((row = sqlNextRow(sr)) != NULL)
	{
	psl = pslLoad(row+rowOffset);
	slAddHead(&pslList, psl);
	minPslStart = min(psl->tStart, minPslStart);
	maxPslEnd = max(psl->tEnd, maxPslEnd);
	/* This just adds the mrna twice to the list, cheat way to add more
	   weight to certain tables. */
	if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i])))
	    {
	    psl = clonePsl(psl);
	    slAddHead(&pslList, psl);
	    }
	}
    sqlFreeResult(&sr);
    }

chromPslBin = binKeeperNew(minPslStart, maxPslEnd);
agxSeenBin = binKeeperNew(minPslStart, maxPslEnd);
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl);
    }
}
Exemple #11
0
static struct hash *loadAllGaps(struct sqlConnection *conn, char *db)
/*	working on all chroms, fetch all per-chrom gap counts at once
 *	returns hash by chrom name to gap counts for that chrom
 */
{ 
struct chromInfo *cInfo;
struct sqlResult *sr;
char **row;
struct hash *ret;
int totalGapSize = 0;
int gapCount = 0;

ret = newHash(0);

/*	If not split, read in whole gulp, create per-chrom hash of sizes */
if (hTableExists(db, "gap"))
    {
    char *prevChrom = NULL;
    int totalGapsThisChrom = 0;
    
    sr = sqlGetResult(conn,
	NOSQLINJ "select chrom,chromStart,chromEnd from gap order by chrom");
    while ((row = sqlNextRow(sr)) != NULL)
	{
	int gapSize = sqlUnsigned(row[2]) - sqlUnsigned(row[1]);
	++gapCount;
	if (prevChrom && sameWord(prevChrom,row[0]))
	    {
	    totalGapsThisChrom += gapSize;
	    totalGapSize += gapSize;
	    }
	else
	    {
	    if (prevChrom)
		{
		hashAddInt(ret, prevChrom, totalGapsThisChrom);
		freeMem(prevChrom);
		prevChrom = cloneString(row[0]);
		totalGapsThisChrom = gapSize;
		totalGapSize += gapSize;
		}
	    else
		{
		prevChrom = cloneString(row[0]);
		totalGapsThisChrom = gapSize;
		totalGapSize += gapSize;
		}
	    }
	}
	/*	and the last one	*/
	if (prevChrom && (totalGapsThisChrom > 0))
	    {
	    hashAddInt(ret, prevChrom, totalGapsThisChrom);
	    freeMem(prevChrom);
	    }
    sqlFreeResult(&sr);
    }
else
    {
    /*	for each chrom name, fetch the gap count	*/
    for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
	{
	int rowOffset;
	int totalGapsThisChrom = 0;
	sr = hChromQuery(conn, "gap", cInfo->chrom, NULL, &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	    {
	    int gapSize;
	    struct agpGap gap;
	    ++gapCount;
	    agpGapStaticLoad(row+rowOffset, &gap);
	    gapSize = gap.chromEnd - gap.chromStart;
	    totalGapsThisChrom += gapSize;
	    totalGapSize += gapSize;
	    }
	sqlFreeResult(&sr);
	hashAddInt(ret, cInfo->chrom, totalGapsThisChrom);
	}
    }
verbose(2,"#\tloaded %d gaps covering %d bases\n", gapCount, totalGapSize);
return ret;
}
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack,
              struct hash *otherHash, struct stats *stats)
/* Process one chromosome. */
{
    struct bed *bedList = NULL, *bed;
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    int rowOffset;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    struct psl *pslList = NULL;
    struct dnaSeq *chromSeq = NULL;

    if (endsWith(bedTrack, ".bed"))
    {
        struct lineFile *lf = lineFileOpen(bedTrack, TRUE);
        char *row[3];
        while (lineFileRow(lf, row))
        {
            if (sameString(chrom, row[0]))
            {
                bed = bedLoad3(row);
                slAddHead(&bedList, bed);
            }
        }
        lineFileClose(&lf);
    }
    else
    {
        sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            bed = bedLoad3(row+rowOffset);
            slAddHead(&bedList, bed);
        }
        sqlFreeResult(&sr);
    }
    slReverse(&bedList);
    uglyf("Loaded beds\n");

    sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        struct psl *psl = pslLoad(row + rowOffset);
        slAddHead(&pslList, psl);
        binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
    sqlFreeResult(&sr);
    uglyf("Loaded psls\n");

    chromSeq = hLoadChrom(database, chrom);
    /* Fetch entire chromosome into memory. */
    uglyf("Loaded human seq\n");

    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd);
        for (el = list; el != NULL; el = el->next)
        {
            struct psl *fullPsl = el->val;
            struct psl *psl = pslTrimToTargetRange(fullPsl,
                                                   bed->chromStart, bed->chromEnd);
            if (psl != NULL)
            {
                foldPslIntoStats(psl, chromSeq, otherHash, stats);
                pslFree(&psl);
            }
        }
        slFreeList(&list);
        stats->bedCount += 1;
        stats->bedBaseCount += bed->chromEnd - bed->chromStart;
        sqlFreeResult(&sr);
    }
    freeDnaSeq(&chromSeq);
    pslFreeList(&pslList);
    binKeeperFree(&bk);
    hFreeConn(&conn);
}