Пример #1
0
void liftAcross(char *liftAcross, char *srcFile, char *dstOut)
/* liftAcross - convert one coordinate system to another, no overlapping items. */
{
struct hash *lftHash = readLift(liftAcross);
struct genePred *gpList = genePredExtLoadAll(srcFile);
struct genePred *gp = NULL;
FILE *out = mustOpen(dstOut, "w");

if (bedOut)
    bedRegionOutput(lftHash);

int genePredItemCount = 0;
for (gp = gpList; gp != NULL; gp = gp->next)
    {
    struct liftSpec *lsFound = hashFindVal(lftHash, gp->chrom);
    if (lsFound)
	{
	struct genePred *gpLifted = liftGenePred(gp, lsFound);
	struct genePred *gpl;
	for (gpl = gpLifted; gpl != NULL; gpl = gpl->next)
	    genePredTabOut(gpl, out);
	genePredFreeList(&gpLifted);
	}
    else
	{
	genePredTabOut(gp, out);
	}
    ++genePredItemCount;
    }
/* lftHash and gpList are left allocated to disappear at exit */
verbose(2,"#\tgene pred item count: %d\n", genePredItemCount);
}
Пример #2
0
int palOutPredsInBeds(struct sqlConnection *conn, struct cart *cart,
    struct bed *beds, char *table )
/* output the alignments whose names and coords match a bed*/
{
struct genePred *list = NULL;

for(; beds; beds = beds->next)
    {
    char where[10 * 1024];

    sqlSafefFrag(where, sizeof where,
	"name = '%s' and chrom='%s' and txEnd > %d and txStart <= %d",
	beds->name, beds->chrom, beds->chromStart, beds->chromEnd);

    struct genePredReader *reader = genePredReaderQuery( conn, table, where);
    struct genePred *pred;
    while ((pred = genePredReaderNext(reader)) != NULL)
	slAddHead(&list, pred);

    genePredReaderFree(&reader);
    }

int outCount = 0;
if (list != NULL)
    {
    slReverse(&list);
    outCount = palOutPredList( conn, cart, list);
    genePredFreeList(&list);
    }

return outCount;
}
Пример #3
0
static void getGeneAnns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request genePred annotations from database */
{
struct genePred *gps = genePredReaderLoadQuery(conn, "refGene", NULL);
slSort(&gps, genePredNameCmp);
FILE *fh = mustOpen(outFile, "w");
struct genePred *gp;
for (gp = gps; gp != NULL; gp = gp->next)
    processGenePred(fh, refSeqVerInfoTbl, gp);
carefulClose(&fh);
genePredFreeList(&gps);
}
Пример #4
0
void hgLoadGenePred(char *db, char *table, int numGenePreds, char **genePredFiles)
/* hgLoadGenePred - Load up a mySQL database genePred table. */
{
struct genePred *genes = loadGenes(numGenePreds, genePredFiles);
struct sqlConnection *conn = sqlConnect(db);
char *tmpDir = ".";
FILE *tabFh = hgCreateTabFile(tmpDir, table);

mkTabFile(db, genes, tabFh);
genePredFreeList(&genes);
setupTable(db, conn, table);
hgLoadTabFile(conn, tmpDir, table, &tabFh);
sqlDisconnect(&conn);
hgRemoveTabFile(tmpDir, table);
}
Пример #5
0
static void createCcdsGene(struct sqlConnection *conn, char *ccdsGeneFile,
                           struct genomeInfo *genome, struct hash* ignoreTbl,
                           struct hash *gotCcds)
/* create the ccdsGene tab file from the ccds database */
{
struct ccdsLocationsJoin *locs = loadLocations(conn, genome, ignoreTbl, gotCcds);
struct genePred *gp, *genes = buildCcdsGene(&locs);
FILE *genesFh;

genesFh = mustOpen(ccdsGeneFile, "w");
for (gp = genes; gp != NULL; gp = gp->next)
    {
    if (loadDb)
        fprintf(genesFh, "%d\t", binFromRange(gp->txStart, gp->txEnd));
    genePredTabOut(gp, genesFh);
    }
carefulClose(&genesFh);
genePredFreeList(&genes);
}
Пример #6
0
void createAltSplices(char *db, char *outFile,  boolean memTest)
/* Top level routine, gets genePredictions and runs through them to 
   build altSplice graphs. */
{
struct genePred *gp = NULL, *gpList = NULL;
struct altGraphX *ag=NULL;
FILE *out = NULL;
struct sqlConnection *conn = hAllocConn(db);
char *gpFile = NULL;
char *bedFile = NULL;
int count =0;

/* Figure out where to get coordinates from. */
bedFile = optionVal("beds", NULL);
gpFile = optionVal("genePreds", NULL);
if(bedFile != NULL)
    gpList = convertBedsToGps(bedFile);
else if(gpFile != NULL)
    gpList = genePredLoadAll(gpFile);
else 
    {
    warn("Must specify target loci as either a bed file or a genePred file");
    usage();
    }

if (!gpAllSameChrom(gpList))
    errAbort("Multiple chromosomes in bed or genePred file.");

/* Sanity check to make sure we got some loci to work
   with. */
if(gpList == NULL)
    errAbort("No gene boundaries were found.");
slSort(&gpList, genePredCmp);
setupTables(gpList->chrom);

/* If local memory get things going here. */
if(optionExists("localMem")) 
    {
    warn("Using local memory. Setting up caches...");
    useChromKeeper = TRUE;
    setupChromKeeper(conn, optionVal("db", NULL), gpList->chrom);
    if(!optionExists("skipTissues"))
	{
	if(optionExists("tissueLibFile"))
	    readTissueLibraryIntoCache(optionVal("tissueLibFile", NULL));
	else
	    setupTissueLibraryCache(conn);
	}
    warn("Done setting up local caches.");
    }
else /* Have to set up agxSeen binKeeper based on genePreds. */
    {
    int maxPos = 0;
    int minPos = BIGNUM;
    for(gp = gpList; gp != NULL; gp = gp->next)
	{
	maxPos = max(maxPos, gp->txEnd);
	minPos = min(minPos, gp->txStart);
	}
    agxSeenBin = binKeeperNew(max(0, minPos-10000), min(BIGNUM,maxPos+10000));
    }

dotForUserInit(max(slCount(gpList)/10, 1));
out = mustOpen(outFile, "w");
for(gp = gpList; gp != NULL && count < 5; )
    {
    dotForUser();
    fflush(stderr);
    ag = agFromGp(db, gp, conn, 5, out); /* memory held in binKeeper. Free
				      * later. */
    if (memTest != TRUE) 
	gp = gp->next;
    }
genePredFreeList(&gpList);
hFreeConn(&conn);
/* uglyf("%d genePredictions with %d clusters, %d cassette exons, %d of are not mod 3.\n", */
/*       slCount(gpList), clusterCount, cassetteCount, misSense); */
}
void oneChromInput(char *database, char *chrom, int chromSize, 	
	char *rangeTrack, char *expTrack, 
	struct hash *refLinkHash, struct hash *erHash, FILE *f)
/* Read in info for one chromosome. */
{
struct binKeeper *rangeBk = binKeeperNew(0, chromSize);
struct binKeeper *expBk = binKeeperNew(0, chromSize);
struct binKeeper *knownBk = binKeeperNew(0, chromSize);
struct bed *rangeList = NULL, *range;
struct bed *expList = NULL;
struct genePred *knownList = NULL;
struct rangeInfo *riList = NULL, *ri;
struct hash *riHash = hashNew(0); /* rangeInfo values. */
struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be;

/* Load up data from database. */
rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk);
expList = loadBed(database, chrom, expTrack, 15, expBk);
knownList = loadGenePred(database, chrom, "refGene", knownBk);

/* Build range info basics. */
rangeBeList = binKeeperFindAll(rangeBk);
for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next)
    {
    range = rangeBe->val;
    AllocVar(ri);
    slAddHead(&riList, ri);
    hashAddSaveName(riHash, range->name, ri, &ri->id);
    ri->range = range;
    ri->commonName = findCommonName(range, knownBk, refLinkHash);
    }
slReverse(&riList);

/* Mark split ones. */
beList = binKeeperFindAll(expBk);
for (be = beList; be != NULL; be = be->next)
    {
    struct bed *exp = be->val;
    struct binElement *subList = binKeeperFind(rangeBk, 
    	exp->chromStart, exp->chromEnd);
    if (slCount(subList) > 1)
        {
	struct binElement *sub;
	for (sub = subList; sub != NULL; sub = sub->next)
	    {
	    struct bed *range = sub->val;
	    struct rangeInfo *ri = hashMustFindVal(riHash, range->name);
	    ri->isSplit = TRUE;
	    }
	}
    slFreeList(&subList);
    }

/* Output the nice ones: not split and having some expression info. */
for (ri = riList; ri != NULL; ri = ri->next)
    {
    if (!ri->isSplit)
        {
	struct bed *range =  ri->range;
	beList = binKeeperFind(expBk, range->chromStart, range->chromEnd);
	if (beList != NULL)
	    outputAveraged(f, ri, erHash, beList);
	slFreeList(&beList);
	}
    }

/* Clean up time! */
freeHash(&riHash);
genePredFreeList(&knownList);
bedFree(&rangeList);
bedFree(&expList);
slFreeList(&rangeBeList);
slFreeList(&beList);
slFreeList(&riList);
binKeeperFree(&rangeBk);
binKeeperFree(&expBk);
binKeeperFree(&knownBk);
}