void hgLoadNet(char *database, char *track, int netCount, char *netFiles[]) /* hgLoadNet - Load a net file into database. */ { int i; struct lineFile *lf ; struct chainNet *net; char alignFileName[] ="align.tab"; FILE *alignFile = mustOpen(alignFileName,"w"); for (i=0; i<netCount; ++i) { lf = lineFileOpen(netFiles[i], TRUE); while ((net = chainNetRead(lf)) != NULL) { verbose(1, "read %s\n",net->name); cnWriteTables(net->name,net->fillList, alignFile, 1); chainNetFree(&net); } lineFileClose(&lf); } fclose(alignFile); if (!test) { loadDatabase(database, alignFileName, track); remove(alignFileName); } }
void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName) /* netToAxt - Convert net (and chain) to axt.. */ { Bits *usedBits = findUsedIds(netName); struct hash *chainHash; struct chainNet *net; struct lineFile *lf = lineFileOpen(netName, TRUE); FILE *f = mustOpen(axtName, "w"); struct dnaSeq *tChrom = NULL; struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir); char *gapFileName = optionVal("gapOut", NULL); FILE *gapFile = NULL; if (gapFileName) gapFile = mustOpen(gapFileName, "w"); lineFileSetMetaDataOutput(lf, f); chainHash = chainReadUsedSwap(chainName, qChain, usedBits); bitFree(&usedBits); while ((net = chainNetRead(lf)) != NULL) { verbose(1, "Processing %s\n", net->name); tChrom = nibTwoLoadOne(tNibDir, net->name); if (tChrom->size != net->size) errAbort("Size mismatch on %s. Net/nib out of sync or possibly nib dirs swapped?", tChrom->name); rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile); freeDnaSeq(&tChrom); chainNetFree(&net); } nibTwoCacheFree(&qNtc); }
void liftNet(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .net file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct chainNet *net; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((net = chainNetRead(lf)) != NULL) { if (querySide) { struct hash *newNameHash = hashNew(6); liftFillsQ(net->fillList, newNameHash, liftHash, lf); hashFree(&(net->nameHash)); net->nameHash = newNameHash; } else { struct liftSpec *spec = findLift(liftHash, net->name, lf); if (spec == NULL) { if (how != carryMissing) { chainNetFree(&net); continue; } } else { freeMem(net->name); net->name = cloneString(spec->newName); net->size = spec->newSize; liftFillsT(net->fillList, spec); } } chainNetWrite(net, f); chainNetFree(&net); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
struct rbTree *rbTreeFromNetFile(char *fileName) /* Build an rbTree from a net file */ { struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp); struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chainNet *cn = chainNetRead(lf); struct cnFill *fill = NULL; for(fill=cn->fillList; fill != NULL; fill = fill->next) { rbTreeAdd(rbTree, fill); } return rbTree; }
void netStats(char *summaryFile, int inCount, char *inFiles[]) /* netStats - Gather statistics on net. */ { int i; int netCount = 0; FILE *gapFile = optionalFile("gap"); FILE *fillFile = optionalFile("fill"); FILE *topFile = optionalFile("top"); FILE *nonSynFile = optionalFile("nonSyn"); FILE *invFile = optionalFile("inv"); FILE *synFile = optionalFile("syn"); FILE *dupeFile = optionalFile("dupe"); intLm = lmInit(0); logFile = mustOpen(summaryFile, "w"); logIt("net files: %d\n", inCount); for (i=0; i<inCount; ++i) { struct lineFile *lf = lineFileOpen(inFiles[i], TRUE); struct chainNet *net; while ((net = chainNetRead(lf)) != NULL) { printf("%s\n", net->name); ++netCount; traverseNet(net, NULL, depthGather); traverseNet(net, gapFile, gapGather); traverseNet(net, fillFile, fillGather); traverseNet(net, topFile, topGather); traverseNet(net, nonSynFile, nonSynGather); traverseNet(net, synFile, synGather); traverseNet(net, invFile, invGather); traverseNet(net, dupeFile, dupeGather); chainNetFree(&net); } lineFileClose(&lf); } logIt("net chromosomes: %d\n", netCount); logIt("max depth: %d\n", depthMax); logIt("gap count: %d\n", gapCount); logIt("gap average size T: %4.1f\n", gapSizeT/(double)gapCount); logIt("gap average size Q: %4.1f\n", gapSizeQ/(double)gapCount); logFillStats("fill", &fillStats); logFillStats("top", &topStats); logFillStats("nonSyn", &nonSynStats); logFillStats("syn", &synStats); logFillStats("inv", &invStats); logFillStats("dupe", &dupeStats); }
Bits *findUsedIds(char *netFileName) /* Create a bit array with 1's corresponding to * chainId's used in net file. */ { struct lineFile *lf = lineFileOpen(netFileName, TRUE); Bits *bits = bitAlloc(maxChainId); struct chainNet *net; while ((net = chainNetRead(lf)) != NULL) { chainNetMarkUsed(net, bits, maxChainId); chainNetFree(&net); } lineFileClose(&lf); return bits; }
void netToBedDetailed(char *inName, char *outName, int maxGap, int minFill) /* netToBedDetailed - Convert target coverage of net to a bed * breaking up things at big gaps and excluding small * fills. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct chainNet *net; while ((net = chainNetRead(lf)) != NULL) { rNetToBed(net, net->fillList, maxGap, minFill, f); chainNetFree(&net); } }
struct hash *netToBkHash(char *netFile) /* Read net file into a hash full of binKeepers keyed by chromosome. * The binKeepers are full of nets. */ { struct hash *netHash = hashNew(0); struct lineFile *lf = lineFileOpen(netFile, TRUE); struct chainNet *net, *netList = chainNetRead(lf); for (net = netList; net != NULL; net = net->next) { if (hashLookup(netHash, net->name)) errAbort("%s has multiple %s records", netFile, net->name); struct binKeeper *bk = binKeeperNew(0, net->size); hashAdd(netHash, net->name, bk); struct cnFill *fill; for(fill=net->fillList; fill != NULL; fill = fill->next) binKeeperAdd(bk, fill->tStart, fill->tStart+fill->tSize, fill); } lineFileClose(&lf); return netHash; }
void netChainSubset(char *netIn, char *chainIn, char *chainOut) /* netChainSubset - Create chain file with subset of * * chains that appear in the net. */ { struct hash *chainHash; struct chainNet *net; struct lineFile *lf = lineFileOpen(netIn, TRUE); FILE *f = mustOpen(chainOut, "w"); char *gapFileName = optionVal("gapOut", NULL); FILE *gapFile = NULL; if (gapFileName) gapFile = mustOpen(gapFileName, "w"); chainHash = chainReadAllWithMeta(chainIn, f); while ((net = chainNetRead(lf)) != NULL) { verbose(1, "Processing %s\n", net->name); rConvert(net->fillList, chainHash, f, gapFile); chainNetFree(&net); } }
void netContigs(char *netFile, char *liftFile) /* netContigs - get query-side contigs from a chrom-level net file */ { struct lineFile *lf = lineFileOpen(netFile, TRUE); struct chainNet *net; struct liftSpec *lift, *prevLift, *lifts; char *chrom; /* read lift file and split into a hash of per-chrom lists */ lifts = readLifts(liftFile); prevLift = NULL; for (lift = lifts; lift != NULL; lift = lift->next) { //uglyf("reading lift: %s\n", lift->oldName); if (hashLookup(chromHash, lift->newName) == NULL) { /* new chrom */ //uglyf("adding chrom: %s\n", lift->newName); hashAdd(chromHash, lift->newName, lift); /* terminate previous list */ /* NOTE: expects input sorted by chrom */ if (prevLift != NULL) prevLift->next = NULL; } prevLift = lift; } /* read in nets and convert query side to contig coords */ //uglyf("reading in nets\n"); while ((net = chainNetRead(lf)) != NULL) { rLower(net->fillList); chainNetFree(&net); } /* print accumulated contigs */ hashTraverseEls(contigNames, printContig); }
void netClass(char *inName, char *tDb, char *qDb, char *outName) /* netClass - Add classification info to net. */ { struct chainNet *net; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct chrom *qChromList, *chrom; struct hash *qChromHash; struct hash *arHash = NULL; struct sqlConnection *tConn = sqlConnect(tDb); struct sqlConnection *qConn = sqlConnect(qDb); qLm = lmInit(0); if (!noAr) arHash = getAncientRepeats(tConn, qConn); getChroms(qConn, &qChromHash, &qChromList); verbose(1, "Reading gaps in %s\n", qDb); if (sqlTableExists(qConn, "gap")) { getSeqGapsUnsplit(qConn, qChromHash); } else { for (chrom = qChromList; chrom != NULL; chrom = chrom->next) chrom->nGaps = getSeqGaps(qConn, chrom->name); } if (qNewR) { verbose(1, "Reading new repeats from %s\n", qNewR); for (chrom = qChromList; chrom != NULL; chrom = chrom->next) chrom->newRepeats = getNewRepeats(qNewR, chrom->name); } verbose(1, "Reading simpleRepeats in %s\n", qDb); getTrfUnsplit(qConn, qChromHash); if (qRepeatTable) { verbose(1, "Reading repeats in %s from table %s\n", qDb, qRepeatTable); getRepeatsUnsplitTable(qConn, qChromHash, qRepeatTable); } else { verbose(1, "Reading repeats in %s\n", qDb); if (sqlTableExists(qConn, "rmsk")) getRepeatsUnsplit(qConn, qChromHash, arHash); else { for (chrom = qChromList; chrom != NULL; chrom = chrom->next) getRepeats(qConn, arHash, chrom->name, &chrom->repeats, &chrom->oldRepeats); } } while ((net = chainNetRead(lf)) != NULL) { struct rbTree *tN, *tRepeats, *tOldRepeats, *tTrf; char *tName = net->name; if (liftHashT != NULL) { struct liftSpec *lft = hashMustFindVal(liftHashT, net->name); tName = lft->newName; } verbose(1, "Processing %s.%s\n", tDb, net->name); tN = getSeqGaps(tConn, tName); tAddN(net, net->fillList, tN); rbTreeFree(&tN); qAddN(net, net->fillList, qChromHash); if (tRepeatTable) getRepeatsTable(tConn, tRepeatTable, tName, &tRepeats, &tOldRepeats); else getRepeats(tConn, arHash, tName, &tRepeats, &tOldRepeats); tAddR(net, net->fillList, tRepeats); if (!noAr) tAddOldR(net, net->fillList, tOldRepeats); rbTreeFree(&tRepeats); rbTreeFree(&tOldRepeats); qAddR(net, net->fillList, qChromHash); if (!noAr) qAddOldR(net, net->fillList, qChromHash); tTrf = getTrf(tConn, tName); tAddTrf(net, net->fillList, tTrf); rbTreeFree(&tTrf); qAddTrf(net, net->fillList, qChromHash); if (tNewR) { struct rbTree *tree = getNewRepeats(tNewR, tName); tAddNewR(net, net->fillList, tree); rbTreeFree(&tree); } if (qNewR) qAddNewR(net, net->fillList, qChromHash); chainNetWrite(net, f); chainNetFree(&net); } sqlDisconnect(&tConn); sqlDisconnect(&qConn); }
void netFilter(int inCount, char *inFiles[]) /* netFilter - Filter out parts of net.. */ { FILE *f = stdout; int i; boolean doLine = optionExists("line"); tHash = hashCommaOption("t"); notTHash = hashCommaOption("notT"); qHash = hashCommaOption("q"); notQHash = hashCommaOption("notQ"); minScore = optionInt("minScore", -BIGNUM); maxScore = optionFloat("maxScore", 9e99); qStartMin = optionInt("qStartMin", -BIGNUM); qStartMax = optionInt("qStartMax", BIGNUM); qEndMin = optionInt("qEndMin", -BIGNUM); qEndMax = optionInt("qEndMax", BIGNUM); tStartMin = optionInt("tStartMin", -BIGNUM); tStartMax = optionInt("tStartMax", BIGNUM); tEndMin = optionInt("tEndMin", -BIGNUM); tEndMax = optionInt("tEndMax", BIGNUM); qOverlapStart = optionInt("qOverlapStart", -BIGNUM); qOverlapEnd = optionInt("qOverlapEnd", BIGNUM); tOverlapStart = optionInt("tOverlapStart", -BIGNUM); tOverlapEnd = optionInt("tOverlapEnd", BIGNUM); doSyn = optionExists("syn"); minTopScore = optionFloat("minTopScore", minTopScore); minSynScore = optionFloat("minSynScore", minSynScore); minSynSize = optionFloat("minSynSize", minSynSize); minSynAli = optionFloat("minSynAli", minSynAli); maxFar = optionFloat("maxFar", maxFar); doChimpSyn = optionExists("chimpSyn"); doNonSyn = optionExists("nonsyn"); minGap = optionInt("minGap", minGap); minAli = optionInt("minAli", minAli); minSizeT = optionInt("minSizeT", minSizeT); minSizeQ = optionInt("minSizeQ", minSizeQ); fillOnly = optionExists("fill"); gapOnly = optionExists("gap"); types = optionMultiVal("type", types); noRandom = optionExists("noRandom"); noHap = optionExists("noHap"); for (i=0; i<inCount; ++i) { struct lineFile *lf = lineFileOpen(inFiles[i], TRUE); if (doLine) { netLineFilter(lf, f); } else { struct chainNet *net; while ((net = chainNetRead(lf)) != NULL) { boolean writeIt = TRUE; if (tHash != NULL && !hashLookup(tHash, net->name)) writeIt = FALSE; if (notTHash != NULL && hashLookup(notTHash, net->name)) writeIt = FALSE; if (noRandom && (endsWith(net->name, "_random") || startsWith("chrUn", net->name) || sameWord("chrNA", net->name) /* danRer */ || sameWord("chrU", net->name))) /* dm */ writeIt = FALSE; if (noHap && stringIn("_hap",net->name)) writeIt = FALSE; if (writeIt) { writeFiltered(net, f); } chainNetFree(&net); } } lineFileClose(&lf); } }