void doOneChrom(char *database, char *chrom, char *rnaTable, char *expTable, FILE *f)
/* Process one chromosome. */
{
int chromSize = hChromSize(database, chrom);
struct binKeeper *bk = binKeeperNew(0, chromSize);
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;
char **row;
struct bed *exp, *rna;
int rowOffset;
struct binElement *be, *beList;
int oneCount;

/* Load up expTable into bin-keeper. */
sr = hChromQuery(conn, expTable, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    exp = bedLoadN(row + rowOffset, 12);
    binKeeperAdd(bk, exp->chromStart, exp->chromEnd, exp);
    }
sqlFreeResult(&sr);

/* Loop through rnaTable and look at intersections. */
sr = hChromQuery(conn, rnaTable, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    rna = bedLoadN(row + rowOffset, 12);
    beList = binKeeperFind(bk, rna->chromStart, rna->chromEnd);
    oneCount = 0;
    for (be = beList; be != NULL; be = be->next)
        {
	exp = be->val;
	if (exp->strand[0] == rna->strand[0])
	    {
	    ++oneCount;
	    ++hitCount;
//	    fprintf(f, "%s:%d-%d\t%s\t%s\n", 
//	    	rna->chrom, rna->chromStart, rna->chromEnd, rna->name, exp->name);
	    }
	}
    slFreeList(&beList);
    if (oneCount == 0)
	{
        ++missCount;
	fprintf(f, "miss %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
	}
    else if (oneCount == 1)
	{
	fprintf(f, "uniq %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
        ++uniqCount;
	}
    else
	{
	fprintf(f, "dupe %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name);
        ++dupeCount;
	}
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}
void getBinKeeper(char *chromName)
/* put SNPs in binKeeper */
{
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;

int start = 0;
int end = 0;
char *rsId = NULL;

int chromSize = hChromSize(chromName);

verbose(1, "constructing binKeeper...\n");
snps = binKeeperNew(0, chromSize);
safef(query, sizeof(query), 
      "select chromStart, chromEnd, name from %s where chrom = '%s'", snpTable, chromName);

sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    start = sqlUnsigned(row[0]);
    end = sqlUnsigned(row[1]);
    rsId = cloneString(row[2]);
    binKeeperAdd(snps, start, end, rsId);
    }

sqlFreeResult(&sr);
hFreeConn(&conn);
}
Ejemplo n.º 3
0
struct hash *readChainToBinKeeper(char *sizeFileName, char *fileName)
{
struct binKeeper *bk; 
struct chain *chain;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
struct hash *hash = newHash(0);
char *chromRow[2];

while (lineFileRow(sf, chromRow))
    {
    char *name = chromRow[0];
    int size = lineFileNeedNum(sf, chromRow, 1);

    if (hashLookup(hash, name) != NULL)
        warn("Duplicate %s, ignoring all but first\n", name);
    else
        {
        bk = binKeeperNew(0, size);
        assert(size > 1);
	hashAdd(hash, name, bk);
        }
    }
while ((chain = chainRead(lf)) != NULL)
    {
    bk = hashMustFindVal(hash, chain->tName);
    binKeeperAdd(bk, chain->tStart, chain->tEnd, chain);
    }
lineFileClose(&lf);
return hash;
}
struct binKeeper *fbToBinKeeper(struct featureBits *fbList, int chromSize)
/* Make a binKeeper filled with fbList. */
{
struct binKeeper *bk = binKeeperNew(0, chromSize);
struct featureBits *fb;
for (fb = fbList; fb != NULL; fb = fb->next)
    binKeeperAdd(bk, fb->start, fb->end, fb);
return bk;
}
Ejemplo n.º 5
0
void loadPslsFromFile(char *pslFile, char *chrom, struct sqlConnection *conn)
/** Load the psls from the directed file (instead of the database. */
{
struct psl *psl = NULL, *pslNext = NULL, *pslList = NULL;
pslList = pslLoadAll(pslFile);
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    minPslStart = min(psl->tStart, minPslStart);
    maxPslEnd = max(psl->tEnd, maxPslEnd);
    }
chromPslBin = binKeeperNew(minPslStart, maxPslEnd);
agxSeenBin = binKeeperNew(minPslStart, maxPslEnd);
for(psl = pslList; psl != NULL; psl = pslNext)
    {
    pslNext = psl->next;
    if(sameString(psl->tName, chrom))
	binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl);
    else
	pslFree(&psl);
    }
}
Ejemplo n.º 6
0
struct hash *readBed(char *fileName)
/* Read in bed file into hash of binKeepers keyed by
 * target. */
{
struct lineFile *lf = NULL;
struct hash *hash = newHash(0);
char *row[3];
struct chromInfo *ciList = NULL, *ci;
int count = 0, chromCount = 0;

/* Make first pass through just figuring out maximum size
 * of each chromosome info. */
lf = lineFileOpen(fileName, TRUE);
while (lineFileRow(lf, row))
    {
    char *chrom = row[0];
    int e = lineFileNeedNum(lf, row, 2);
    ci = hashFindVal(hash, chrom);
    if (ci == NULL)
        {
	AllocVar(ci);
	hashAddSaveName(hash, chrom, ci, &ci->name);
	slAddHead(&ciList, ci);
	++chromCount;
	}
    if (e > ci->maxEnd)
        ci->maxEnd = e;
    ++count;
    }
lineFileClose(&lf);

/* Allocate binKeeper on each chromosome. */
for (ci = ciList; ci != NULL; ci = ci->next)
    {
    ci->bk = binKeeperNew(0, ci->maxEnd);
    }

/* Make second pass filling in binKeeper */
lf = lineFileOpen(fileName, TRUE);
while (lineFileRow(lf, row))
    {
    char *chrom = row[0];
    int s = lineFileNeedNum(lf, row, 1);
    int e = lineFileNeedNum(lf, row, 2);
    ci = hashMustFindVal(hash, chrom);
    binKeeperAdd(ci->bk, s, e, NULL);
    }
lineFileClose(&lf);
printf("Read %d items in %d target chromosomes from %s\n", 
	count, chromCount, fileName);
return hash;
}
struct binKeeper *getChromBins(struct hash *chromHash, char *chrom,
                               char *strand)
/* get binKeeper object for a chrom and strand, creating if needed */
{
char chromStrand[64];
struct hashEl *hel;

safef(chromStrand, sizeof(chromStrand), "%s%s", chrom, strand);
hel = hashLookup(chromHash, chromStrand);
if (hel == NULL)
    hel = hashAdd(chromHash, chromStrand,
                  binKeeperNew(0, 511*1024*1024));
return hel->val;
}
Ejemplo n.º 8
0
struct hash *minChromSizeKeeperHash(struct hash *sizeHash)
/* Return a hash full of binKeepers that match the input sizeHash,
 * (which generally is the output of minChromSizeFromBeds). */
{
struct hashEl *el, *list = hashElListHash(sizeHash);
struct hash *keeperHash = hashNew(16);
for (el = list; el != NULL; el = el->next)
    {
    struct minChromSize *chrom = el->val;
    struct binKeeper *bk = binKeeperNew(0, chrom->minSize);
    hashAdd(keeperHash, chrom->chrom, bk);
    }
hashElFreeList(&list);
return keeperHash;
}
Ejemplo n.º 9
0
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) 
/** Load all of the desired alignments into the chromkeeper structure
    from the desired pslTables. */
{
int i = 0;
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = 0;
struct psl *pslList = NULL, *psl = NULL;
for(i = 0; i < numDbTables; i++)
    {
    sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); 
    while((row = sqlNextRow(sr)) != NULL)
	{
	psl = pslLoad(row+rowOffset);
	slAddHead(&pslList, psl);
	minPslStart = min(psl->tStart, minPslStart);
	maxPslEnd = max(psl->tEnd, maxPslEnd);
	/* This just adds the mrna twice to the list, cheat way to add more
	   weight to certain tables. */
	if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i])))
	    {
	    psl = clonePsl(psl);
	    slAddHead(&pslList, psl);
	    }
	}
    sqlFreeResult(&sr);
    }

chromPslBin = binKeeperNew(minPslStart, maxPslEnd);
agxSeenBin = binKeeperNew(minPslStart, maxPslEnd);
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl);
    }
}
Ejemplo n.º 10
0
struct hash *keepersForChroms(struct sqlConnection *conn)
/* Create hash of binKeepers keyed by chromosome */
{
struct hash *keeperHash = hashNew(0);
struct sqlResult *sr = sqlGetResult(conn, NOSQLINJ "select chrom,size from chromInfo");
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *chrom = row[0];
    int size = sqlUnsigned(row[1]);
    struct binKeeper *bk = binKeeperNew(0, size);
    hashAdd(keeperHash, chrom, bk);
    }
sqlFreeResult(&sr);
return keeperHash;
}
Ejemplo n.º 11
0
struct mouseChromCache *newMouseChromCache(char *chrom, int chromSize, 
	char *ratMouseDir)
/* Create a new chromCache. */
{
struct mouseChromCache *mcc;
char fileName[512];
struct lineFile *lf;
char *row[3];
int start,end;
long long *pPos;

/* Open up file with actual alignments.  Warn and return NULL
 * if it doesn't exist. */
sprintf(fileName, "%s/%s.axt", ratMouseDir, chrom);
lf = lineFileMayOpen(fileName, TRUE);

/* Allocate structure and store basic info in it. */
AllocVar(mcc);
mcc->name = cloneString(chrom);
mcc->size = chromSize;
mcc->lf = lf;
if (lf == NULL)
    {
    warn("%s doesn't exist", fileName);
    if (!noDieMissing)
        noWarnAbort(); 
    return mcc;
    }

/* Read index file into bk. */
sprintf(fileName, "%s/%s.axt.ix", ratMouseDir, chrom);
mcc->bk = binKeeperNew(0, chromSize);
lf = lineFileOpen(fileName, TRUE);
verbose(1, "Reading %s\n", fileName);
while (lineFileRow(lf, row))
    {
    start = lineFileNeedNum(lf, row, 0);
    end = lineFileNeedNum(lf, row, 1) + start;
    AllocVar(pPos);
    *pPos = atoll(row[2]);
    binKeeperAdd(mcc->bk, start, end, pPos);
    }
lineFileClose(&lf);

/* Return initialized object. */
return mcc;
}
Ejemplo n.º 12
0
struct binKeeper *loadAxtsIntoRange(char *fileName, char *tPrefix, char *qPrefix)
/* Read in an axt file and shove it into a bin-keeper. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct binKeeper *bk = binKeeperNew(0, maxChromSize);
struct axt *axt;
int count = 0;

while ((axt = axtRead(lf)) != NULL)
    {
    binKeeperAdd(bk, axt->tStart, axt->tEnd, axt);
    ++count;
    }
uglyf("LOaded %d from %s\n", count, fileName);
lineFileClose(&lf);
return bk;
}
void chromKeeperInit(char *db)
/* Initialize the chromKeeper to a given database (hg15,mm2, etc). */
{
struct slName *names = NULL, *name = NULL;
int count=0;
names = hAllChromNames(db);
chromCount = slCount(names);
assert(chromNames == NULL && chromRanges == NULL);
AllocArray(chromNames, chromCount);
AllocArray(chromRanges, chromCount);
for(name=names; name != NULL; name = name->next)
    {
    int size = hChromSize(db, name->name);
    chromRanges[count] = binKeeperNew(0,size);
    chromNames[count] = cloneString(name->name);
    count++;
    }
slFreeList(&names);
}
void chromKeeperInitChroms(struct slName *nameList, int maxChromSize)
/* Initialize a chrom keeper with a list of names and a size that
   will be used for each one. */
{
struct slName *name = NULL;
int count=0;
chromCount = slCount(nameList);
if(chromCount == 0)
    return;
assert(chromNames == NULL && chromRanges == NULL);
AllocArray(chromNames, chromCount);
AllocArray(chromRanges, chromCount);
for(name=nameList; name != NULL; name = name->next)
    {
    chromRanges[count] = binKeeperNew(0,maxChromSize);
    chromNames[count] = cloneString(name->name);
    count++;
    }
}
Ejemplo n.º 15
0
struct hash *readBed(char *fileName)
/* Read bed and return it as a hash keyed by chromName
 * with binKeeper values. */
{
char *row[5];
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = newHash(0);
int expectedCols = bScore ? 5 : 3;

while (lineFileNextRow(lf, row, expectedCols))
    {
    struct binKeeper *bk;
    struct bed5 *bed;
    struct hashEl *hel = hashLookup(hash, row[0]);
    if (hel == NULL)
       {
       bk = binKeeperNew(0, 1024*1024*1024);
       hel = hashAdd(hash, row[0], bk);
       }
    bk = hel->val;
    AllocVar(bed);
    bed->chrom = hel->name;
    bed->start = lineFileNeedNum(lf, row, 1);
    bed->end = lineFileNeedNum(lf, row, 2);
    if (bScore)
	bed->score = lineFileNeedNum(lf, row, 4);
    if (bed->start > bed->end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (bed->start == bed->end)
	{
	if (allowStartEqualEnd)
	    // Note we are tweaking binKeeper coords here, so use bed->start and bed->end.
	    binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed);
	else
	    lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
	}
    else
	binKeeperAdd(bk, bed->start, bed->end, bed);
    }
lineFileClose(&lf);
return hash;
}
Ejemplo n.º 16
0
struct hash *netToBkHash(char *netFile)
/* Read net file into a hash full of binKeepers keyed by chromosome.
 * The binKeepers are full of nets. */
{
struct hash *netHash = hashNew(0);
struct lineFile *lf = lineFileOpen(netFile, TRUE);
struct chainNet *net, *netList = chainNetRead(lf);
for (net = netList; net != NULL; net = net->next)
    {
    if (hashLookup(netHash, net->name))
        errAbort("%s has multiple %s records", netFile, net->name);
    struct binKeeper *bk = binKeeperNew(0, net->size);
    hashAdd(netHash, net->name, bk);
    struct cnFill *fill;
    for(fill=net->fillList; fill != NULL; fill = fill->next)
	binKeeperAdd(bk, fill->tStart, fill->tStart+fill->tSize, fill);
    }
lineFileClose(&lf);
return netHash;                
}
Ejemplo n.º 17
0
struct binKeeper *readRepeats2(char *chrom, char *rmskFileName, struct hash *tSizeHash)
/* read all repeats for a chromosome of size size, returns results in binKeeper structure for fast query*/
{
    boolean rmskRet;
    struct lineFile *rmskF = NULL;
    struct rmskOut2 *rmsk;
    struct binKeeper *bk; 
    int size;

    size = hashIntVal(tSizeHash, chrom);
    bk = binKeeperNew(0, size);
    assert(size > 1);
    rmskOut2OpenVerify(rmskFileName ,&rmskF , &rmskRet);
    while ((rmsk = rmskOut2ReadNext(rmskF)) != NULL)
        {
        binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk);
        }
    lineFileClose(&rmskF);
    return bk;
}
Ejemplo n.º 18
0
struct hash *bedsIntoHashOfKeepers(struct bed *bedList)
/* Return a hash full of binKeepers, keyed by chromosome (or contig)
 * that contains the bedList */
{
struct hash *sizeHash = chromMinSizeHash(bedList);
struct hash *keeperHash = hashNew(16);
struct bed *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    struct binKeeper *keeper = hashFindVal(keeperHash, bed->chrom);
    if (keeper == NULL)
        {
	struct minChromSize *chrom = hashMustFindVal(sizeHash, bed->chrom);
	keeper = binKeeperNew(0, chrom->minSize);
	hashAdd(keeperHash, chrom->name, keeper);
	}
    binKeeperAdd(keeper, bed->chromStart, bed->chromEnd, bed);
    }
hashFree(&sizeHash);
return keeperHash;
}
Ejemplo n.º 19
0
struct hash *txgIntoKeeperHash(struct txGraph *txgList)
/* Create a hash full of bin keepers (one for each chromosome or contig.
 * The binKeepers are full of txGraphs. */
{
struct hash *sizeHash = txgChromMinSizeHash(txgList);
struct hash *bkHash = hashNew(16);
struct txGraph *txg;
for (txg = txgList; txg != NULL; txg = txg->next)
    {
    struct binKeeper *bk = hashFindVal(bkHash, txg->tName);
    if (bk == NULL)
        {
	struct minChromSize *chrom = hashMustFindVal(sizeHash, txg->tName);
	verbose(3, "New binKeeper for %s\n", txg->tName);
	bk = binKeeperNew(0, chrom->minSize);
	hashAdd(bkHash, txg->tName, bk);
	}
    binKeeperAdd(bk, txg->tStart, txg->tEnd, txg);
    }
hashFree(&sizeHash);
return bkHash;
}
Ejemplo n.º 20
0
struct hash *readLiftOverMapChainHash(char *fileName)
/* taken from kent/src/hg/lib/liftOver.c */
/* Read map file into hashes. */
{
    struct hash *chainHash = hashNew(10);
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct chain *chain;
    struct liftOverChromMap *map;
    
    while ((chain = chainRead(lf)) != NULL)
    {
	if ((map = hashFindVal(chainHash, chain->tName)) == NULL)
	{
	    AllocVar(map);
	    map->bk = binKeeperNew(0, chain->tSize);
	    hashAddSaveName(chainHash, chain->tName, map, &map->name);
	}
	binKeeperAdd(map->bk, chain->tStart, chain->tEnd, chain);
    }
    lineFileClose(&lf);
    return chainHash;
}
Ejemplo n.º 21
0
struct hash *readRepeatsAll2(char *sizeFileName, char *rmskDir)
/* read all repeats for a all chromosomes getting sizes from sizeFileNmae , returns results in hash of binKeeper structure for fast query*/
{
boolean rmskRet;
struct binKeeper *bk; 
struct lineFile *rmskF = NULL;
struct rmskOut2 *rmsk;
struct lineFile *lf = lineFileOpen(sizeFileName, TRUE);
struct hash *hash = newHash(0);
char *row[2];
char rmskFileName[256];

while (lineFileRow(lf, row))
    {
    char *name = row[0];
    int size = lineFileNeedNum(lf, row, 1);

    if (hashLookup(hash, name) != NULL)
        warn("Duplicate %s, ignoring all but first\n", name);
    else
        {
        bk = binKeeperNew(0, size);
        assert(size > 1);
        safef(rmskFileName, sizeof(rmskFileName), "%s/%s.fa.out",rmskDir,name);
        rmskOut2OpenVerify(rmskFileName ,&rmskF , &rmskRet);
        while ((rmsk = rmskOut2ReadNext(rmskF)) != NULL)
            {
            binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk);
            }
        lineFileClose(&rmskF);
	hashAdd(hash, name, bk);
        }
    }
lineFileClose(&lf);
return hash;
}
void sortGenes(struct sqlConnection *conn)
/* Put up sort gene page. */
{
cartWebStart(cart, database, "Finding Candidate Genes for Gene Sorter");
if (!hgNearOk(database))
    errAbort("Sorry, gene sorter not available for this database.");

/* Get list of regions. */
struct genoGraph *gg = ggFirstVisible();
double threshold = getThreshold();
struct bed3 *bed, *bedList = regionsOverThreshold(gg);

/* Figure out what table and column are the sorter's main gene set. */
struct hash *genomeRa = hgReadRa(genome, database, "hgNearData", 
	"genome.ra", NULL);
char *geneTable = hashMustFindVal(genomeRa, "geneTable");
char *idColumn = hashMustFindVal(genomeRa, "idColumn");

/* if marker labels were present when the file was uploaded, they are saved here */
char cgmName[256];
safef(cgmName, sizeof(cgmName), "%s.cgm", gg->binFileName);
struct lineFile *m = lineFileMayOpen(cgmName, TRUE);
char *cgmRow[4];
cgmRow[0] = "";    /* dummy row */
cgmRow[1] = "";
cgmRow[2] = "0";
cgmRow[3] = "0";

FILE *g = NULL;
int markerCount = 0;
struct tempName snpTn;

if (m)
    {
    /* Create custom column output file. */
    trashDirFile(&snpTn, "hgg", "marker", ".mrk");  
    g = mustOpen(snpTn.forCgi, "w");
    fprintf(g, 
	"column name=\"%s Markers\" shortLabel=\"%s Markers over threshold\" longLabel=\"%s Markers in regions over threshold\" " 
	"visibility=on priority=99 "
        "\n"
        , gg->shortLabel
        , gg->shortLabel
        , gg->shortLabel
	);
    }

/*** Build up hash of all transcriptHash that are in region. */
struct hash *transcriptHash = hashNew(16);

/* This loop handles one chromosome at a time.  It depends on
 * the bedList being sorted by chromosome. */
for (bed = bedList; bed != NULL; )
    {

    /* Make binKeeper and stuff in all regions in this chromosome into it. */
    char *chrom = bed->chrom;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    while (bed != NULL && sameString(chrom, bed->chrom))
	{
	binKeeperAdd(bk, bed->chromStart, bed->chromEnd, bed);
	bed = bed->next;
	}

    struct binKeeper *bkGenes = NULL;
    if (m)
       bkGenes = binKeeperNew(0, chromSize);

    /* Query database to find out bounds of all genes on this chromosome
     * and if they overlap any of the regions then put them in the hash. */
    char query[512];
    safef(query, sizeof(query), 
    	"select name,txStart,txEnd from %s where chrom='%s'", geneTable, chrom);
    struct sqlResult *sr = sqlGetResult(conn, query);
    char **row;
    while ((row = sqlNextRow(sr)) != NULL)
        {
	char *name = row[0];
	int start = sqlUnsigned(row[1]);
	int end = sqlUnsigned(row[2]);
	if (binKeeperAnyOverlap(bk, start, end))
	    {
	    hashStore(transcriptHash, name);
	    if (m)
		binKeeperAdd(bkGenes, start, end, cloneString(name));
	    }
	}
    sqlFreeResult(&sr);

    if (m)
	{
	/* Read cgm file if it exists, looking at all markers on this chromosome
	 * and if they overlap any of the regions and genes then output them. */
	do 
	    {
	    // marker, chrom, chromStart, val
	    char *marker = cgmRow[0];
	    char *chr = cgmRow[1];
	    int start = sqlUnsigned(cgmRow[2]);
	    int end = start+1;
	    double val = sqlDouble(cgmRow[3]);
            int cmp = strcmp(chr,chrom);
            if (cmp > 0)
                break;
            if (cmp == 0)
		{
		if (val >= threshold)
		    {
		    struct binElement *el, *bkList = binKeeperFind(bkGenes, start, end);
		    for (el = bkList; el; el=el->next)
			{
			/* output to custom column trash file */
			fprintf(g, "%s %s\n", (char *)el->val, marker);
			}
		    if (bkList)
			{
			++markerCount;
			slFreeList(&bkList);
			}
		    }
		}
	    }
	while (lineFileRow(m, cgmRow));
	}

    /* Clean up for this chromosome. */
    binKeeperFree(&bk);

    if (m)
	{
	/* For speed, we do not free up the values (cloned the kg names earlier) */
	binKeeperFree(&bkGenes);  
	}

    }

/* Get list of all transcripts in regions. */
struct hashEl *el, *list = hashElListHash(transcriptHash);

/* Create file with all matching gene IDs. */
struct tempName keyTn;
trashDirFile(&keyTn, "hgg", "key", ".key");
FILE *f = mustOpen(keyTn.forCgi, "w");
for (el = list; el != NULL; el = el->next)
    fprintf(f, "%s\n", el->name);
carefulClose(&f);

/* Print out some info. */
hPrintf("Thresholding <i>%s</i> at %g. ", gg->shortLabel, threshold);
hPrintf("There are %d regions covering %lld bases.<BR>\n",
    slCount(bedList), bedTotalSize((struct bed*)bedList) );
hPrintf("Installed a Gene Sorter filter that selects only genes in these regions.<BR>\n");
if (m)
    {
    hPrintf("There are %d markers in the regions over threshold that overlap knownGenes.<BR>\n", markerCount);
    hPrintf("Installed a Gene Sorter custom column called \"%s Markers\" with these markers.<BR>\n", gg->shortLabel);
    }

/* close custom column output file */
if (m)
    {
    lineFileClose(&m);
    carefulClose(&g);
    }

/* Stuff cart variable with name of file. */
char keyCartName[256];
safef(keyCartName, sizeof(keyCartName), "%s%s.keyFile",
	advFilterPrefix, idColumn);
cartSetString(cart, keyCartName, keyTn.forCgi);

cartSetString(cart, customFileVarName, snpTn.forCgi);

char snpVisCartNameTemp[256];
char *snpVisCartName = NULL;
safef(snpVisCartNameTemp, sizeof(snpVisCartNameTemp), "%s%s Markers.vis",
	colConfigPrefix, gg->shortLabel);
snpVisCartName = replaceChars(snpVisCartNameTemp, " ", "_");
cartSetString(cart, snpVisCartName, "1");
freeMem(snpVisCartName);

hPrintf("<FORM ACTION=\"../cgi-bin/hgNear\" METHOD=GET>\n");
cartSaveSession(cart);
hPrintf("<CENTER>");
cgiMakeButton("submit", "go to gene sorter");
hPrintf("</CENTER>");
hPrintf("</FORM>");

cartWebEnd();
}
Ejemplo n.º 23
0
void createAltSplices(char *db, char *outFile,  boolean memTest)
/* Top level routine, gets genePredictions and runs through them to 
   build altSplice graphs. */
{
struct genePred *gp = NULL, *gpList = NULL;
struct altGraphX *ag=NULL;
FILE *out = NULL;
struct sqlConnection *conn = hAllocConn(db);
char *gpFile = NULL;
char *bedFile = NULL;
int count =0;

/* Figure out where to get coordinates from. */
bedFile = optionVal("beds", NULL);
gpFile = optionVal("genePreds", NULL);
if(bedFile != NULL)
    gpList = convertBedsToGps(bedFile);
else if(gpFile != NULL)
    gpList = genePredLoadAll(gpFile);
else 
    {
    warn("Must specify target loci as either a bed file or a genePred file");
    usage();
    }

if (!gpAllSameChrom(gpList))
    errAbort("Multiple chromosomes in bed or genePred file.");

/* Sanity check to make sure we got some loci to work
   with. */
if(gpList == NULL)
    errAbort("No gene boundaries were found.");
slSort(&gpList, genePredCmp);
setupTables(gpList->chrom);

/* If local memory get things going here. */
if(optionExists("localMem")) 
    {
    warn("Using local memory. Setting up caches...");
    useChromKeeper = TRUE;
    setupChromKeeper(conn, optionVal("db", NULL), gpList->chrom);
    if(!optionExists("skipTissues"))
	{
	if(optionExists("tissueLibFile"))
	    readTissueLibraryIntoCache(optionVal("tissueLibFile", NULL));
	else
	    setupTissueLibraryCache(conn);
	}
    warn("Done setting up local caches.");
    }
else /* Have to set up agxSeen binKeeper based on genePreds. */
    {
    int maxPos = 0;
    int minPos = BIGNUM;
    for(gp = gpList; gp != NULL; gp = gp->next)
	{
	maxPos = max(maxPos, gp->txEnd);
	minPos = min(minPos, gp->txStart);
	}
    agxSeenBin = binKeeperNew(max(0, minPos-10000), min(BIGNUM,maxPos+10000));
    }

dotForUserInit(max(slCount(gpList)/10, 1));
out = mustOpen(outFile, "w");
for(gp = gpList; gp != NULL && count < 5; )
    {
    dotForUser();
    fflush(stderr);
    ag = agFromGp(db, gp, conn, 5, out); /* memory held in binKeeper. Free
				      * later. */
    if (memTest != TRUE) 
	gp = gp->next;
    }
genePredFreeList(&gpList);
hFreeConn(&conn);
/* uglyf("%d genePredictions with %d clusters, %d cassette exons, %d of are not mod 3.\n", */
/*       slCount(gpList), clusterCount, cassetteCount, misSense); */
}
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack,
              struct hash *otherHash, struct stats *stats)
/* Process one chromosome. */
{
    struct bed *bedList = NULL, *bed;
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    int rowOffset;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    struct psl *pslList = NULL;
    struct dnaSeq *chromSeq = NULL;

    if (endsWith(bedTrack, ".bed"))
    {
        struct lineFile *lf = lineFileOpen(bedTrack, TRUE);
        char *row[3];
        while (lineFileRow(lf, row))
        {
            if (sameString(chrom, row[0]))
            {
                bed = bedLoad3(row);
                slAddHead(&bedList, bed);
            }
        }
        lineFileClose(&lf);
    }
    else
    {
        sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            bed = bedLoad3(row+rowOffset);
            slAddHead(&bedList, bed);
        }
        sqlFreeResult(&sr);
    }
    slReverse(&bedList);
    uglyf("Loaded beds\n");

    sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        struct psl *psl = pslLoad(row + rowOffset);
        slAddHead(&pslList, psl);
        binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
    sqlFreeResult(&sr);
    uglyf("Loaded psls\n");

    chromSeq = hLoadChrom(database, chrom);
    /* Fetch entire chromosome into memory. */
    uglyf("Loaded human seq\n");

    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd);
        for (el = list; el != NULL; el = el->next)
        {
            struct psl *fullPsl = el->val;
            struct psl *psl = pslTrimToTargetRange(fullPsl,
                                                   bed->chromStart, bed->chromEnd);
            if (psl != NULL)
            {
                foldPslIntoStats(psl, chromSeq, otherHash, stats);
                pslFree(&psl);
            }
        }
        slFreeList(&list);
        stats->bedCount += 1;
        stats->bedBaseCount += bed->chromEnd - bed->chromStart;
        sqlFreeResult(&sr);
    }
    freeDnaSeq(&chromSeq);
    pslFreeList(&pslList);
    binKeeperFree(&bk);
    hFreeConn(&conn);
}
void oneChromInput(char *database, char *chrom, int chromSize, 	
	char *rangeTrack, char *expTrack, 
	struct hash *refLinkHash, struct hash *erHash, FILE *f)
/* Read in info for one chromosome. */
{
struct binKeeper *rangeBk = binKeeperNew(0, chromSize);
struct binKeeper *expBk = binKeeperNew(0, chromSize);
struct binKeeper *knownBk = binKeeperNew(0, chromSize);
struct bed *rangeList = NULL, *range;
struct bed *expList = NULL;
struct genePred *knownList = NULL;
struct rangeInfo *riList = NULL, *ri;
struct hash *riHash = hashNew(0); /* rangeInfo values. */
struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be;

/* Load up data from database. */
rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk);
expList = loadBed(database, chrom, expTrack, 15, expBk);
knownList = loadGenePred(database, chrom, "refGene", knownBk);

/* Build range info basics. */
rangeBeList = binKeeperFindAll(rangeBk);
for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next)
    {
    range = rangeBe->val;
    AllocVar(ri);
    slAddHead(&riList, ri);
    hashAddSaveName(riHash, range->name, ri, &ri->id);
    ri->range = range;
    ri->commonName = findCommonName(range, knownBk, refLinkHash);
    }
slReverse(&riList);

/* Mark split ones. */
beList = binKeeperFindAll(expBk);
for (be = beList; be != NULL; be = be->next)
    {
    struct bed *exp = be->val;
    struct binElement *subList = binKeeperFind(rangeBk, 
    	exp->chromStart, exp->chromEnd);
    if (slCount(subList) > 1)
        {
	struct binElement *sub;
	for (sub = subList; sub != NULL; sub = sub->next)
	    {
	    struct bed *range = sub->val;
	    struct rangeInfo *ri = hashMustFindVal(riHash, range->name);
	    ri->isSplit = TRUE;
	    }
	}
    slFreeList(&subList);
    }

/* Output the nice ones: not split and having some expression info. */
for (ri = riList; ri != NULL; ri = ri->next)
    {
    if (!ri->isSplit)
        {
	struct bed *range =  ri->range;
	beList = binKeeperFind(expBk, range->chromStart, range->chromEnd);
	if (beList != NULL)
	    outputAveraged(f, ri, erHash, beList);
	slFreeList(&beList);
	}
    }

/* Clean up time! */
freeHash(&riHash);
genePredFreeList(&knownList);
bedFree(&rangeList);
bedFree(&expList);
slFreeList(&rangeBeList);
slFreeList(&beList);
slFreeList(&riList);
binKeeperFree(&rangeBk);
binKeeperFree(&expBk);
binKeeperFree(&knownBk);
}