void liftAcross(char *liftAcross, char *srcFile, char *dstOut) /* liftAcross - convert one coordinate system to another, no overlapping items. */ { struct hash *lftHash = readLift(liftAcross); struct genePred *gpList = genePredExtLoadAll(srcFile); struct genePred *gp = NULL; FILE *out = mustOpen(dstOut, "w"); if (bedOut) bedRegionOutput(lftHash); int genePredItemCount = 0; for (gp = gpList; gp != NULL; gp = gp->next) { struct liftSpec *lsFound = hashFindVal(lftHash, gp->chrom); if (lsFound) { struct genePred *gpLifted = liftGenePred(gp, lsFound); struct genePred *gpl; for (gpl = gpLifted; gpl != NULL; gpl = gpl->next) genePredTabOut(gpl, out); genePredFreeList(&gpLifted); } else { genePredTabOut(gp, out); } ++genePredItemCount; } /* lftHash and gpList are left allocated to disappear at exit */ verbose(2,"#\tgene pred item count: %d\n", genePredItemCount); }
int palOutPredsInBeds(struct sqlConnection *conn, struct cart *cart, struct bed *beds, char *table ) /* output the alignments whose names and coords match a bed*/ { struct genePred *list = NULL; for(; beds; beds = beds->next) { char where[10 * 1024]; sqlSafefFrag(where, sizeof where, "name = '%s' and chrom='%s' and txEnd > %d and txStart <= %d", beds->name, beds->chrom, beds->chromStart, beds->chromEnd); struct genePredReader *reader = genePredReaderQuery( conn, table, where); struct genePred *pred; while ((pred = genePredReaderNext(reader)) != NULL) slAddHead(&list, pred); genePredReaderFree(&reader); } int outCount = 0; if (list != NULL) { slReverse(&list); outCount = palOutPredList( conn, cart, list); genePredFreeList(&list); } return outCount; }
static void getGeneAnns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile) /* get request genePred annotations from database */ { struct genePred *gps = genePredReaderLoadQuery(conn, "refGene", NULL); slSort(&gps, genePredNameCmp); FILE *fh = mustOpen(outFile, "w"); struct genePred *gp; for (gp = gps; gp != NULL; gp = gp->next) processGenePred(fh, refSeqVerInfoTbl, gp); carefulClose(&fh); genePredFreeList(&gps); }
void hgLoadGenePred(char *db, char *table, int numGenePreds, char **genePredFiles) /* hgLoadGenePred - Load up a mySQL database genePred table. */ { struct genePred *genes = loadGenes(numGenePreds, genePredFiles); struct sqlConnection *conn = sqlConnect(db); char *tmpDir = "."; FILE *tabFh = hgCreateTabFile(tmpDir, table); mkTabFile(db, genes, tabFh); genePredFreeList(&genes); setupTable(db, conn, table); hgLoadTabFile(conn, tmpDir, table, &tabFh); sqlDisconnect(&conn); hgRemoveTabFile(tmpDir, table); }
static void createCcdsGene(struct sqlConnection *conn, char *ccdsGeneFile, struct genomeInfo *genome, struct hash* ignoreTbl, struct hash *gotCcds) /* create the ccdsGene tab file from the ccds database */ { struct ccdsLocationsJoin *locs = loadLocations(conn, genome, ignoreTbl, gotCcds); struct genePred *gp, *genes = buildCcdsGene(&locs); FILE *genesFh; genesFh = mustOpen(ccdsGeneFile, "w"); for (gp = genes; gp != NULL; gp = gp->next) { if (loadDb) fprintf(genesFh, "%d\t", binFromRange(gp->txStart, gp->txEnd)); genePredTabOut(gp, genesFh); } carefulClose(&genesFh); genePredFreeList(&genes); }
void createAltSplices(char *db, char *outFile, boolean memTest) /* Top level routine, gets genePredictions and runs through them to build altSplice graphs. */ { struct genePred *gp = NULL, *gpList = NULL; struct altGraphX *ag=NULL; FILE *out = NULL; struct sqlConnection *conn = hAllocConn(db); char *gpFile = NULL; char *bedFile = NULL; int count =0; /* Figure out where to get coordinates from. */ bedFile = optionVal("beds", NULL); gpFile = optionVal("genePreds", NULL); if(bedFile != NULL) gpList = convertBedsToGps(bedFile); else if(gpFile != NULL) gpList = genePredLoadAll(gpFile); else { warn("Must specify target loci as either a bed file or a genePred file"); usage(); } if (!gpAllSameChrom(gpList)) errAbort("Multiple chromosomes in bed or genePred file."); /* Sanity check to make sure we got some loci to work with. */ if(gpList == NULL) errAbort("No gene boundaries were found."); slSort(&gpList, genePredCmp); setupTables(gpList->chrom); /* If local memory get things going here. */ if(optionExists("localMem")) { warn("Using local memory. Setting up caches..."); useChromKeeper = TRUE; setupChromKeeper(conn, optionVal("db", NULL), gpList->chrom); if(!optionExists("skipTissues")) { if(optionExists("tissueLibFile")) readTissueLibraryIntoCache(optionVal("tissueLibFile", NULL)); else setupTissueLibraryCache(conn); } warn("Done setting up local caches."); } else /* Have to set up agxSeen binKeeper based on genePreds. */ { int maxPos = 0; int minPos = BIGNUM; for(gp = gpList; gp != NULL; gp = gp->next) { maxPos = max(maxPos, gp->txEnd); minPos = min(minPos, gp->txStart); } agxSeenBin = binKeeperNew(max(0, minPos-10000), min(BIGNUM,maxPos+10000)); } dotForUserInit(max(slCount(gpList)/10, 1)); out = mustOpen(outFile, "w"); for(gp = gpList; gp != NULL && count < 5; ) { dotForUser(); fflush(stderr); ag = agFromGp(db, gp, conn, 5, out); /* memory held in binKeeper. Free * later. */ if (memTest != TRUE) gp = gp->next; } genePredFreeList(&gpList); hFreeConn(&conn); /* uglyf("%d genePredictions with %d clusters, %d cassette exons, %d of are not mod 3.\n", */ /* slCount(gpList), clusterCount, cassetteCount, misSense); */ }
void oneChromInput(char *database, char *chrom, int chromSize, char *rangeTrack, char *expTrack, struct hash *refLinkHash, struct hash *erHash, FILE *f) /* Read in info for one chromosome. */ { struct binKeeper *rangeBk = binKeeperNew(0, chromSize); struct binKeeper *expBk = binKeeperNew(0, chromSize); struct binKeeper *knownBk = binKeeperNew(0, chromSize); struct bed *rangeList = NULL, *range; struct bed *expList = NULL; struct genePred *knownList = NULL; struct rangeInfo *riList = NULL, *ri; struct hash *riHash = hashNew(0); /* rangeInfo values. */ struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be; /* Load up data from database. */ rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk); expList = loadBed(database, chrom, expTrack, 15, expBk); knownList = loadGenePred(database, chrom, "refGene", knownBk); /* Build range info basics. */ rangeBeList = binKeeperFindAll(rangeBk); for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next) { range = rangeBe->val; AllocVar(ri); slAddHead(&riList, ri); hashAddSaveName(riHash, range->name, ri, &ri->id); ri->range = range; ri->commonName = findCommonName(range, knownBk, refLinkHash); } slReverse(&riList); /* Mark split ones. */ beList = binKeeperFindAll(expBk); for (be = beList; be != NULL; be = be->next) { struct bed *exp = be->val; struct binElement *subList = binKeeperFind(rangeBk, exp->chromStart, exp->chromEnd); if (slCount(subList) > 1) { struct binElement *sub; for (sub = subList; sub != NULL; sub = sub->next) { struct bed *range = sub->val; struct rangeInfo *ri = hashMustFindVal(riHash, range->name); ri->isSplit = TRUE; } } slFreeList(&subList); } /* Output the nice ones: not split and having some expression info. */ for (ri = riList; ri != NULL; ri = ri->next) { if (!ri->isSplit) { struct bed *range = ri->range; beList = binKeeperFind(expBk, range->chromStart, range->chromEnd); if (beList != NULL) outputAveraged(f, ri, erHash, beList); slFreeList(&beList); } } /* Clean up time! */ freeHash(&riHash); genePredFreeList(&knownList); bedFree(&rangeList); bedFree(&expList); slFreeList(&rangeBeList); slFreeList(&beList); slFreeList(&riList); binKeeperFree(&rangeBk); binKeeperFree(&expBk); binKeeperFree(&knownBk); }