void doOneChrom(char *database, char *chrom, char *rnaTable, char *expTable, FILE *f) /* Process one chromosome. */ { int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; struct bed *exp, *rna; int rowOffset; struct binElement *be, *beList; int oneCount; /* Load up expTable into bin-keeper. */ sr = hChromQuery(conn, expTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { exp = bedLoadN(row + rowOffset, 12); binKeeperAdd(bk, exp->chromStart, exp->chromEnd, exp); } sqlFreeResult(&sr); /* Loop through rnaTable and look at intersections. */ sr = hChromQuery(conn, rnaTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { rna = bedLoadN(row + rowOffset, 12); beList = binKeeperFind(bk, rna->chromStart, rna->chromEnd); oneCount = 0; for (be = beList; be != NULL; be = be->next) { exp = be->val; if (exp->strand[0] == rna->strand[0]) { ++oneCount; ++hitCount; // fprintf(f, "%s:%d-%d\t%s\t%s\n", // rna->chrom, rna->chromStart, rna->chromEnd, rna->name, exp->name); } } slFreeList(&beList); if (oneCount == 0) { ++missCount; fprintf(f, "miss %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); } else if (oneCount == 1) { fprintf(f, "uniq %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++uniqCount; } else { fprintf(f, "dupe %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++dupeCount; } } sqlFreeResult(&sr); hFreeConn(&conn); }
void getBinKeeper(char *chromName) /* put SNPs in binKeeper */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; int start = 0; int end = 0; char *rsId = NULL; int chromSize = hChromSize(chromName); verbose(1, "constructing binKeeper...\n"); snps = binKeeperNew(0, chromSize); safef(query, sizeof(query), "select chromStart, chromEnd, name from %s where chrom = '%s'", snpTable, chromName); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { start = sqlUnsigned(row[0]); end = sqlUnsigned(row[1]); rsId = cloneString(row[2]); binKeeperAdd(snps, start, end, rsId); } sqlFreeResult(&sr); hFreeConn(&conn); }
struct hash *readChainToBinKeeper(char *sizeFileName, char *fileName) { struct binKeeper *bk; struct chain *chain; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct lineFile *sf = lineFileOpen(sizeFileName, TRUE); struct hash *hash = newHash(0); char *chromRow[2]; while (lineFileRow(sf, chromRow)) { char *name = chromRow[0]; int size = lineFileNeedNum(sf, chromRow, 1); if (hashLookup(hash, name) != NULL) warn("Duplicate %s, ignoring all but first\n", name); else { bk = binKeeperNew(0, size); assert(size > 1); hashAdd(hash, name, bk); } } while ((chain = chainRead(lf)) != NULL) { bk = hashMustFindVal(hash, chain->tName); binKeeperAdd(bk, chain->tStart, chain->tEnd, chain); } lineFileClose(&lf); return hash; }
struct binKeeper *fbToBinKeeper(struct featureBits *fbList, int chromSize) /* Make a binKeeper filled with fbList. */ { struct binKeeper *bk = binKeeperNew(0, chromSize); struct featureBits *fb; for (fb = fbList; fb != NULL; fb = fb->next) binKeeperAdd(bk, fb->start, fb->end, fb); return bk; }
void loadPslsFromFile(char *pslFile, char *chrom, struct sqlConnection *conn) /** Load the psls from the directed file (instead of the database. */ { struct psl *psl = NULL, *pslNext = NULL, *pslList = NULL; pslList = pslLoadAll(pslFile); for(psl = pslList; psl != NULL; psl = psl->next) { minPslStart = min(psl->tStart, minPslStart); maxPslEnd = max(psl->tEnd, maxPslEnd); } chromPslBin = binKeeperNew(minPslStart, maxPslEnd); agxSeenBin = binKeeperNew(minPslStart, maxPslEnd); for(psl = pslList; psl != NULL; psl = pslNext) { pslNext = psl->next; if(sameString(psl->tName, chrom)) binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl); else pslFree(&psl); } }
struct hash *readBed(char *fileName) /* Read in bed file into hash of binKeepers keyed by * target. */ { struct lineFile *lf = NULL; struct hash *hash = newHash(0); char *row[3]; struct chromInfo *ciList = NULL, *ci; int count = 0, chromCount = 0; /* Make first pass through just figuring out maximum size * of each chromosome info. */ lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { char *chrom = row[0]; int e = lineFileNeedNum(lf, row, 2); ci = hashFindVal(hash, chrom); if (ci == NULL) { AllocVar(ci); hashAddSaveName(hash, chrom, ci, &ci->name); slAddHead(&ciList, ci); ++chromCount; } if (e > ci->maxEnd) ci->maxEnd = e; ++count; } lineFileClose(&lf); /* Allocate binKeeper on each chromosome. */ for (ci = ciList; ci != NULL; ci = ci->next) { ci->bk = binKeeperNew(0, ci->maxEnd); } /* Make second pass filling in binKeeper */ lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { char *chrom = row[0]; int s = lineFileNeedNum(lf, row, 1); int e = lineFileNeedNum(lf, row, 2); ci = hashMustFindVal(hash, chrom); binKeeperAdd(ci->bk, s, e, NULL); } lineFileClose(&lf); printf("Read %d items in %d target chromosomes from %s\n", count, chromCount, fileName); return hash; }
struct binKeeper *getChromBins(struct hash *chromHash, char *chrom, char *strand) /* get binKeeper object for a chrom and strand, creating if needed */ { char chromStrand[64]; struct hashEl *hel; safef(chromStrand, sizeof(chromStrand), "%s%s", chrom, strand); hel = hashLookup(chromHash, chromStrand); if (hel == NULL) hel = hashAdd(chromHash, chromStrand, binKeeperNew(0, 511*1024*1024)); return hel->val; }
struct hash *minChromSizeKeeperHash(struct hash *sizeHash) /* Return a hash full of binKeepers that match the input sizeHash, * (which generally is the output of minChromSizeFromBeds). */ { struct hashEl *el, *list = hashElListHash(sizeHash); struct hash *keeperHash = hashNew(16); for (el = list; el != NULL; el = el->next) { struct minChromSize *chrom = el->val; struct binKeeper *bk = binKeeperNew(0, chrom->minSize); hashAdd(keeperHash, chrom->chrom, bk); } hashElFreeList(&list); return keeperHash; }
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) /** Load all of the desired alignments into the chromkeeper structure from the desired pslTables. */ { int i = 0; struct sqlResult *sr = NULL; char **row = NULL; int rowOffset = 0; struct psl *pslList = NULL, *psl = NULL; for(i = 0; i < numDbTables; i++) { sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); while((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); minPslStart = min(psl->tStart, minPslStart); maxPslEnd = max(psl->tEnd, maxPslEnd); /* This just adds the mrna twice to the list, cheat way to add more weight to certain tables. */ if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i]))) { psl = clonePsl(psl); slAddHead(&pslList, psl); } } sqlFreeResult(&sr); } chromPslBin = binKeeperNew(minPslStart, maxPslEnd); agxSeenBin = binKeeperNew(minPslStart, maxPslEnd); for(psl = pslList; psl != NULL; psl = psl->next) { binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl); } }
struct hash *keepersForChroms(struct sqlConnection *conn) /* Create hash of binKeepers keyed by chromosome */ { struct hash *keeperHash = hashNew(0); struct sqlResult *sr = sqlGetResult(conn, NOSQLINJ "select chrom,size from chromInfo"); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *chrom = row[0]; int size = sqlUnsigned(row[1]); struct binKeeper *bk = binKeeperNew(0, size); hashAdd(keeperHash, chrom, bk); } sqlFreeResult(&sr); return keeperHash; }
struct mouseChromCache *newMouseChromCache(char *chrom, int chromSize, char *ratMouseDir) /* Create a new chromCache. */ { struct mouseChromCache *mcc; char fileName[512]; struct lineFile *lf; char *row[3]; int start,end; long long *pPos; /* Open up file with actual alignments. Warn and return NULL * if it doesn't exist. */ sprintf(fileName, "%s/%s.axt", ratMouseDir, chrom); lf = lineFileMayOpen(fileName, TRUE); /* Allocate structure and store basic info in it. */ AllocVar(mcc); mcc->name = cloneString(chrom); mcc->size = chromSize; mcc->lf = lf; if (lf == NULL) { warn("%s doesn't exist", fileName); if (!noDieMissing) noWarnAbort(); return mcc; } /* Read index file into bk. */ sprintf(fileName, "%s/%s.axt.ix", ratMouseDir, chrom); mcc->bk = binKeeperNew(0, chromSize); lf = lineFileOpen(fileName, TRUE); verbose(1, "Reading %s\n", fileName); while (lineFileRow(lf, row)) { start = lineFileNeedNum(lf, row, 0); end = lineFileNeedNum(lf, row, 1) + start; AllocVar(pPos); *pPos = atoll(row[2]); binKeeperAdd(mcc->bk, start, end, pPos); } lineFileClose(&lf); /* Return initialized object. */ return mcc; }
struct binKeeper *loadAxtsIntoRange(char *fileName, char *tPrefix, char *qPrefix) /* Read in an axt file and shove it into a bin-keeper. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct binKeeper *bk = binKeeperNew(0, maxChromSize); struct axt *axt; int count = 0; while ((axt = axtRead(lf)) != NULL) { binKeeperAdd(bk, axt->tStart, axt->tEnd, axt); ++count; } uglyf("LOaded %d from %s\n", count, fileName); lineFileClose(&lf); return bk; }
void chromKeeperInit(char *db) /* Initialize the chromKeeper to a given database (hg15,mm2, etc). */ { struct slName *names = NULL, *name = NULL; int count=0; names = hAllChromNames(db); chromCount = slCount(names); assert(chromNames == NULL && chromRanges == NULL); AllocArray(chromNames, chromCount); AllocArray(chromRanges, chromCount); for(name=names; name != NULL; name = name->next) { int size = hChromSize(db, name->name); chromRanges[count] = binKeeperNew(0,size); chromNames[count] = cloneString(name->name); count++; } slFreeList(&names); }
void chromKeeperInitChroms(struct slName *nameList, int maxChromSize) /* Initialize a chrom keeper with a list of names and a size that will be used for each one. */ { struct slName *name = NULL; int count=0; chromCount = slCount(nameList); if(chromCount == 0) return; assert(chromNames == NULL && chromRanges == NULL); AllocArray(chromNames, chromCount); AllocArray(chromRanges, chromCount); for(name=nameList; name != NULL; name = name->next) { chromRanges[count] = binKeeperNew(0,maxChromSize); chromNames[count] = cloneString(name->name); count++; } }
struct hash *readBed(char *fileName) /* Read bed and return it as a hash keyed by chromName * with binKeeper values. */ { char *row[5]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); int expectedCols = bScore ? 5 : 3; while (lineFileNextRow(lf, row, expectedCols)) { struct binKeeper *bk; struct bed5 *bed; struct hashEl *hel = hashLookup(hash, row[0]); if (hel == NULL) { bk = binKeeperNew(0, 1024*1024*1024); hel = hashAdd(hash, row[0], bk); } bk = hel->val; AllocVar(bed); bed->chrom = hel->name; bed->start = lineFileNeedNum(lf, row, 1); bed->end = lineFileNeedNum(lf, row, 2); if (bScore) bed->score = lineFileNeedNum(lf, row, 4); if (bed->start > bed->end) errAbort("start after end line %d of %s", lf->lineIx, lf->fileName); if (bed->start == bed->end) { if (allowStartEqualEnd) // Note we are tweaking binKeeper coords here, so use bed->start and bed->end. binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed); else lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)"); } else binKeeperAdd(bk, bed->start, bed->end, bed); } lineFileClose(&lf); return hash; }
struct hash *netToBkHash(char *netFile) /* Read net file into a hash full of binKeepers keyed by chromosome. * The binKeepers are full of nets. */ { struct hash *netHash = hashNew(0); struct lineFile *lf = lineFileOpen(netFile, TRUE); struct chainNet *net, *netList = chainNetRead(lf); for (net = netList; net != NULL; net = net->next) { if (hashLookup(netHash, net->name)) errAbort("%s has multiple %s records", netFile, net->name); struct binKeeper *bk = binKeeperNew(0, net->size); hashAdd(netHash, net->name, bk); struct cnFill *fill; for(fill=net->fillList; fill != NULL; fill = fill->next) binKeeperAdd(bk, fill->tStart, fill->tStart+fill->tSize, fill); } lineFileClose(&lf); return netHash; }
struct binKeeper *readRepeats2(char *chrom, char *rmskFileName, struct hash *tSizeHash) /* read all repeats for a chromosome of size size, returns results in binKeeper structure for fast query*/ { boolean rmskRet; struct lineFile *rmskF = NULL; struct rmskOut2 *rmsk; struct binKeeper *bk; int size; size = hashIntVal(tSizeHash, chrom); bk = binKeeperNew(0, size); assert(size > 1); rmskOut2OpenVerify(rmskFileName ,&rmskF , &rmskRet); while ((rmsk = rmskOut2ReadNext(rmskF)) != NULL) { binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk); } lineFileClose(&rmskF); return bk; }
struct hash *bedsIntoHashOfKeepers(struct bed *bedList) /* Return a hash full of binKeepers, keyed by chromosome (or contig) * that contains the bedList */ { struct hash *sizeHash = chromMinSizeHash(bedList); struct hash *keeperHash = hashNew(16); struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) { struct binKeeper *keeper = hashFindVal(keeperHash, bed->chrom); if (keeper == NULL) { struct minChromSize *chrom = hashMustFindVal(sizeHash, bed->chrom); keeper = binKeeperNew(0, chrom->minSize); hashAdd(keeperHash, chrom->name, keeper); } binKeeperAdd(keeper, bed->chromStart, bed->chromEnd, bed); } hashFree(&sizeHash); return keeperHash; }
struct hash *txgIntoKeeperHash(struct txGraph *txgList) /* Create a hash full of bin keepers (one for each chromosome or contig. * The binKeepers are full of txGraphs. */ { struct hash *sizeHash = txgChromMinSizeHash(txgList); struct hash *bkHash = hashNew(16); struct txGraph *txg; for (txg = txgList; txg != NULL; txg = txg->next) { struct binKeeper *bk = hashFindVal(bkHash, txg->tName); if (bk == NULL) { struct minChromSize *chrom = hashMustFindVal(sizeHash, txg->tName); verbose(3, "New binKeeper for %s\n", txg->tName); bk = binKeeperNew(0, chrom->minSize); hashAdd(bkHash, txg->tName, bk); } binKeeperAdd(bk, txg->tStart, txg->tEnd, txg); } hashFree(&sizeHash); return bkHash; }
struct hash *readLiftOverMapChainHash(char *fileName) /* taken from kent/src/hg/lib/liftOver.c */ /* Read map file into hashes. */ { struct hash *chainHash = hashNew(10); struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chain *chain; struct liftOverChromMap *map; while ((chain = chainRead(lf)) != NULL) { if ((map = hashFindVal(chainHash, chain->tName)) == NULL) { AllocVar(map); map->bk = binKeeperNew(0, chain->tSize); hashAddSaveName(chainHash, chain->tName, map, &map->name); } binKeeperAdd(map->bk, chain->tStart, chain->tEnd, chain); } lineFileClose(&lf); return chainHash; }
struct hash *readRepeatsAll2(char *sizeFileName, char *rmskDir) /* read all repeats for a all chromosomes getting sizes from sizeFileNmae , returns results in hash of binKeeper structure for fast query*/ { boolean rmskRet; struct binKeeper *bk; struct lineFile *rmskF = NULL; struct rmskOut2 *rmsk; struct lineFile *lf = lineFileOpen(sizeFileName, TRUE); struct hash *hash = newHash(0); char *row[2]; char rmskFileName[256]; while (lineFileRow(lf, row)) { char *name = row[0]; int size = lineFileNeedNum(lf, row, 1); if (hashLookup(hash, name) != NULL) warn("Duplicate %s, ignoring all but first\n", name); else { bk = binKeeperNew(0, size); assert(size > 1); safef(rmskFileName, sizeof(rmskFileName), "%s/%s.fa.out",rmskDir,name); rmskOut2OpenVerify(rmskFileName ,&rmskF , &rmskRet); while ((rmsk = rmskOut2ReadNext(rmskF)) != NULL) { binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk); } lineFileClose(&rmskF); hashAdd(hash, name, bk); } } lineFileClose(&lf); return hash; }
void sortGenes(struct sqlConnection *conn) /* Put up sort gene page. */ { cartWebStart(cart, database, "Finding Candidate Genes for Gene Sorter"); if (!hgNearOk(database)) errAbort("Sorry, gene sorter not available for this database."); /* Get list of regions. */ struct genoGraph *gg = ggFirstVisible(); double threshold = getThreshold(); struct bed3 *bed, *bedList = regionsOverThreshold(gg); /* Figure out what table and column are the sorter's main gene set. */ struct hash *genomeRa = hgReadRa(genome, database, "hgNearData", "genome.ra", NULL); char *geneTable = hashMustFindVal(genomeRa, "geneTable"); char *idColumn = hashMustFindVal(genomeRa, "idColumn"); /* if marker labels were present when the file was uploaded, they are saved here */ char cgmName[256]; safef(cgmName, sizeof(cgmName), "%s.cgm", gg->binFileName); struct lineFile *m = lineFileMayOpen(cgmName, TRUE); char *cgmRow[4]; cgmRow[0] = ""; /* dummy row */ cgmRow[1] = ""; cgmRow[2] = "0"; cgmRow[3] = "0"; FILE *g = NULL; int markerCount = 0; struct tempName snpTn; if (m) { /* Create custom column output file. */ trashDirFile(&snpTn, "hgg", "marker", ".mrk"); g = mustOpen(snpTn.forCgi, "w"); fprintf(g, "column name=\"%s Markers\" shortLabel=\"%s Markers over threshold\" longLabel=\"%s Markers in regions over threshold\" " "visibility=on priority=99 " "\n" , gg->shortLabel , gg->shortLabel , gg->shortLabel ); } /*** Build up hash of all transcriptHash that are in region. */ struct hash *transcriptHash = hashNew(16); /* This loop handles one chromosome at a time. It depends on * the bedList being sorted by chromosome. */ for (bed = bedList; bed != NULL; ) { /* Make binKeeper and stuff in all regions in this chromosome into it. */ char *chrom = bed->chrom; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); while (bed != NULL && sameString(chrom, bed->chrom)) { binKeeperAdd(bk, bed->chromStart, bed->chromEnd, bed); bed = bed->next; } struct binKeeper *bkGenes = NULL; if (m) bkGenes = binKeeperNew(0, chromSize); /* Query database to find out bounds of all genes on this chromosome * and if they overlap any of the regions then put them in the hash. */ char query[512]; safef(query, sizeof(query), "select name,txStart,txEnd from %s where chrom='%s'", geneTable, chrom); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; int start = sqlUnsigned(row[1]); int end = sqlUnsigned(row[2]); if (binKeeperAnyOverlap(bk, start, end)) { hashStore(transcriptHash, name); if (m) binKeeperAdd(bkGenes, start, end, cloneString(name)); } } sqlFreeResult(&sr); if (m) { /* Read cgm file if it exists, looking at all markers on this chromosome * and if they overlap any of the regions and genes then output them. */ do { // marker, chrom, chromStart, val char *marker = cgmRow[0]; char *chr = cgmRow[1]; int start = sqlUnsigned(cgmRow[2]); int end = start+1; double val = sqlDouble(cgmRow[3]); int cmp = strcmp(chr,chrom); if (cmp > 0) break; if (cmp == 0) { if (val >= threshold) { struct binElement *el, *bkList = binKeeperFind(bkGenes, start, end); for (el = bkList; el; el=el->next) { /* output to custom column trash file */ fprintf(g, "%s %s\n", (char *)el->val, marker); } if (bkList) { ++markerCount; slFreeList(&bkList); } } } } while (lineFileRow(m, cgmRow)); } /* Clean up for this chromosome. */ binKeeperFree(&bk); if (m) { /* For speed, we do not free up the values (cloned the kg names earlier) */ binKeeperFree(&bkGenes); } } /* Get list of all transcripts in regions. */ struct hashEl *el, *list = hashElListHash(transcriptHash); /* Create file with all matching gene IDs. */ struct tempName keyTn; trashDirFile(&keyTn, "hgg", "key", ".key"); FILE *f = mustOpen(keyTn.forCgi, "w"); for (el = list; el != NULL; el = el->next) fprintf(f, "%s\n", el->name); carefulClose(&f); /* Print out some info. */ hPrintf("Thresholding <i>%s</i> at %g. ", gg->shortLabel, threshold); hPrintf("There are %d regions covering %lld bases.<BR>\n", slCount(bedList), bedTotalSize((struct bed*)bedList) ); hPrintf("Installed a Gene Sorter filter that selects only genes in these regions.<BR>\n"); if (m) { hPrintf("There are %d markers in the regions over threshold that overlap knownGenes.<BR>\n", markerCount); hPrintf("Installed a Gene Sorter custom column called \"%s Markers\" with these markers.<BR>\n", gg->shortLabel); } /* close custom column output file */ if (m) { lineFileClose(&m); carefulClose(&g); } /* Stuff cart variable with name of file. */ char keyCartName[256]; safef(keyCartName, sizeof(keyCartName), "%s%s.keyFile", advFilterPrefix, idColumn); cartSetString(cart, keyCartName, keyTn.forCgi); cartSetString(cart, customFileVarName, snpTn.forCgi); char snpVisCartNameTemp[256]; char *snpVisCartName = NULL; safef(snpVisCartNameTemp, sizeof(snpVisCartNameTemp), "%s%s Markers.vis", colConfigPrefix, gg->shortLabel); snpVisCartName = replaceChars(snpVisCartNameTemp, " ", "_"); cartSetString(cart, snpVisCartName, "1"); freeMem(snpVisCartName); hPrintf("<FORM ACTION=\"../cgi-bin/hgNear\" METHOD=GET>\n"); cartSaveSession(cart); hPrintf("<CENTER>"); cgiMakeButton("submit", "go to gene sorter"); hPrintf("</CENTER>"); hPrintf("</FORM>"); cartWebEnd(); }
void createAltSplices(char *db, char *outFile, boolean memTest) /* Top level routine, gets genePredictions and runs through them to build altSplice graphs. */ { struct genePred *gp = NULL, *gpList = NULL; struct altGraphX *ag=NULL; FILE *out = NULL; struct sqlConnection *conn = hAllocConn(db); char *gpFile = NULL; char *bedFile = NULL; int count =0; /* Figure out where to get coordinates from. */ bedFile = optionVal("beds", NULL); gpFile = optionVal("genePreds", NULL); if(bedFile != NULL) gpList = convertBedsToGps(bedFile); else if(gpFile != NULL) gpList = genePredLoadAll(gpFile); else { warn("Must specify target loci as either a bed file or a genePred file"); usage(); } if (!gpAllSameChrom(gpList)) errAbort("Multiple chromosomes in bed or genePred file."); /* Sanity check to make sure we got some loci to work with. */ if(gpList == NULL) errAbort("No gene boundaries were found."); slSort(&gpList, genePredCmp); setupTables(gpList->chrom); /* If local memory get things going here. */ if(optionExists("localMem")) { warn("Using local memory. Setting up caches..."); useChromKeeper = TRUE; setupChromKeeper(conn, optionVal("db", NULL), gpList->chrom); if(!optionExists("skipTissues")) { if(optionExists("tissueLibFile")) readTissueLibraryIntoCache(optionVal("tissueLibFile", NULL)); else setupTissueLibraryCache(conn); } warn("Done setting up local caches."); } else /* Have to set up agxSeen binKeeper based on genePreds. */ { int maxPos = 0; int minPos = BIGNUM; for(gp = gpList; gp != NULL; gp = gp->next) { maxPos = max(maxPos, gp->txEnd); minPos = min(minPos, gp->txStart); } agxSeenBin = binKeeperNew(max(0, minPos-10000), min(BIGNUM,maxPos+10000)); } dotForUserInit(max(slCount(gpList)/10, 1)); out = mustOpen(outFile, "w"); for(gp = gpList; gp != NULL && count < 5; ) { dotForUser(); fflush(stderr); ag = agFromGp(db, gp, conn, 5, out); /* memory held in binKeeper. Free * later. */ if (memTest != TRUE) gp = gp->next; } genePredFreeList(&gpList); hFreeConn(&conn); /* uglyf("%d genePredictions with %d clusters, %d cassette exons, %d of are not mod 3.\n", */ /* slCount(gpList), clusterCount, cassetteCount, misSense); */ }
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack, struct hash *otherHash, struct stats *stats) /* Process one chromosome. */ { struct bed *bedList = NULL, *bed; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct psl *pslList = NULL; struct dnaSeq *chromSeq = NULL; if (endsWith(bedTrack, ".bed")) { struct lineFile *lf = lineFileOpen(bedTrack, TRUE); char *row[3]; while (lineFileRow(lf, row)) { if (sameString(chrom, row[0])) { bed = bedLoad3(row); slAddHead(&bedList, bed); } } lineFileClose(&lf); } else { sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoad3(row+rowOffset); slAddHead(&bedList, bed); } sqlFreeResult(&sr); } slReverse(&bedList); uglyf("Loaded beds\n"); sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row + rowOffset); slAddHead(&pslList, psl); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); } sqlFreeResult(&sr); uglyf("Loaded psls\n"); chromSeq = hLoadChrom(database, chrom); /* Fetch entire chromosome into memory. */ uglyf("Loaded human seq\n"); for (bed = bedList; bed != NULL; bed = bed->next) { struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd); for (el = list; el != NULL; el = el->next) { struct psl *fullPsl = el->val; struct psl *psl = pslTrimToTargetRange(fullPsl, bed->chromStart, bed->chromEnd); if (psl != NULL) { foldPslIntoStats(psl, chromSeq, otherHash, stats); pslFree(&psl); } } slFreeList(&list); stats->bedCount += 1; stats->bedBaseCount += bed->chromEnd - bed->chromStart; sqlFreeResult(&sr); } freeDnaSeq(&chromSeq); pslFreeList(&pslList); binKeeperFree(&bk); hFreeConn(&conn); }
void oneChromInput(char *database, char *chrom, int chromSize, char *rangeTrack, char *expTrack, struct hash *refLinkHash, struct hash *erHash, FILE *f) /* Read in info for one chromosome. */ { struct binKeeper *rangeBk = binKeeperNew(0, chromSize); struct binKeeper *expBk = binKeeperNew(0, chromSize); struct binKeeper *knownBk = binKeeperNew(0, chromSize); struct bed *rangeList = NULL, *range; struct bed *expList = NULL; struct genePred *knownList = NULL; struct rangeInfo *riList = NULL, *ri; struct hash *riHash = hashNew(0); /* rangeInfo values. */ struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be; /* Load up data from database. */ rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk); expList = loadBed(database, chrom, expTrack, 15, expBk); knownList = loadGenePred(database, chrom, "refGene", knownBk); /* Build range info basics. */ rangeBeList = binKeeperFindAll(rangeBk); for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next) { range = rangeBe->val; AllocVar(ri); slAddHead(&riList, ri); hashAddSaveName(riHash, range->name, ri, &ri->id); ri->range = range; ri->commonName = findCommonName(range, knownBk, refLinkHash); } slReverse(&riList); /* Mark split ones. */ beList = binKeeperFindAll(expBk); for (be = beList; be != NULL; be = be->next) { struct bed *exp = be->val; struct binElement *subList = binKeeperFind(rangeBk, exp->chromStart, exp->chromEnd); if (slCount(subList) > 1) { struct binElement *sub; for (sub = subList; sub != NULL; sub = sub->next) { struct bed *range = sub->val; struct rangeInfo *ri = hashMustFindVal(riHash, range->name); ri->isSplit = TRUE; } } slFreeList(&subList); } /* Output the nice ones: not split and having some expression info. */ for (ri = riList; ri != NULL; ri = ri->next) { if (!ri->isSplit) { struct bed *range = ri->range; beList = binKeeperFind(expBk, range->chromStart, range->chromEnd); if (beList != NULL) outputAveraged(f, ri, erHash, beList); slFreeList(&beList); } } /* Clean up time! */ freeHash(&riHash); genePredFreeList(&knownList); bedFree(&rangeList); bedFree(&expList); slFreeList(&rangeBeList); slFreeList(&beList); slFreeList(&riList); binKeeperFree(&rangeBk); binKeeperFree(&expBk); binKeeperFree(&knownBk); }