void hgLoadNet(char *database, char *track, int netCount, char *netFiles[])
/* hgLoadNet - Load a net file into database. */
{
    int i;
    struct lineFile *lf ;
    struct chainNet *net;
    char alignFileName[] ="align.tab";
    FILE *alignFile = mustOpen(alignFileName,"w");

    for (i=0; i<netCount; ++i)
    {
        lf = lineFileOpen(netFiles[i], TRUE);
        while ((net = chainNetRead(lf)) != NULL)
        {
            verbose(1, "read %s\n",net->name);
            cnWriteTables(net->name,net->fillList, alignFile, 1);
            chainNetFree(&net);
        }
        lineFileClose(&lf);
    }
    fclose(alignFile);
    if (!test)
    {
        loadDatabase(database, alignFileName, track);
        remove(alignFileName);
    }
}
void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName)
/* netToAxt - Convert net (and chain) to axt.. */
{
Bits *usedBits = findUsedIds(netName);
struct hash *chainHash;
struct chainNet *net;
struct lineFile *lf = lineFileOpen(netName, TRUE);
FILE *f = mustOpen(axtName, "w");
struct dnaSeq *tChrom = NULL;
struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir);
char *gapFileName = optionVal("gapOut", NULL);
FILE *gapFile = NULL;

if (gapFileName)
    gapFile = mustOpen(gapFileName, "w");
lineFileSetMetaDataOutput(lf, f);
chainHash = chainReadUsedSwap(chainName, qChain, usedBits);
bitFree(&usedBits);
while ((net = chainNetRead(lf)) != NULL)
    {
    verbose(1, "Processing %s\n", net->name);
    tChrom = nibTwoLoadOne(tNibDir, net->name);
    if (tChrom->size != net->size)
	errAbort("Size mismatch on %s.  Net/nib out of sync or possibly nib dirs swapped?", 
		tChrom->name);
    rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile);
    freeDnaSeq(&tChrom);
    chainNetFree(&net);
    }
nibTwoCacheFree(&qNtc);
}
void liftNet(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .net file. */
{

    FILE *f = mustOpen(destFile, "w");
    int sourceIx;
    int dotMod = dots;

    for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
        {
        char *source = sources[sourceIx];
        struct lineFile *lf = lineFileOpen(source, TRUE);
        struct chainNet *net;
        lineFileSetMetaDataOutput(lf, f);
        verbose(1, "Lifting %s\n", source);
        while ((net = chainNetRead(lf)) != NULL)
            {
            if (querySide)
                {
                struct hash *newNameHash = hashNew(6);
                liftFillsQ(net->fillList, newNameHash, liftHash, lf);
                hashFree(&(net->nameHash));
                net->nameHash = newNameHash;
                }
            else
                {
                struct liftSpec *spec = findLift(liftHash, net->name, lf);
                if (spec == NULL)
                    {
                    if (how != carryMissing)
                        {
                        chainNetFree(&net);
                        continue;
                        }
                    }
                else
                    {
                    freeMem(net->name);
                    net->name = cloneString(spec->newName);
                    net->size = spec->newSize;
                    liftFillsT(net->fillList, spec);
                    }
                }
            chainNetWrite(net, f);
            chainNetFree(&net);
            doDots(&dotMod);
            }
        lineFileClose(&lf);
        if (dots)
            verbose(1, "\n");
        }
}
Exemple #4
0
struct rbTree *rbTreeFromNetFile(char *fileName)
/* Build an rbTree from a net file */
{
struct rbTree *rbTree = rbTreeNew(cnFillRangeCmp);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chainNet *cn = chainNetRead(lf);
struct cnFill *fill = NULL;
for(fill=cn->fillList; fill != NULL; fill = fill->next)
    {
    rbTreeAdd(rbTree, fill);
    }
return rbTree;
}
void netStats(char *summaryFile, int inCount, char *inFiles[])
/* netStats - Gather statistics on net. */
{
int i;
int netCount = 0;
FILE *gapFile = optionalFile("gap");
FILE *fillFile = optionalFile("fill");
FILE *topFile = optionalFile("top");
FILE *nonSynFile = optionalFile("nonSyn");
FILE *invFile = optionalFile("inv");
FILE *synFile = optionalFile("syn");
FILE *dupeFile = optionalFile("dupe");

intLm = lmInit(0);
logFile = mustOpen(summaryFile, "w");
logIt("net files: %d\n", inCount);
for (i=0; i<inCount; ++i)
    {
    struct lineFile *lf = lineFileOpen(inFiles[i], TRUE);
    struct chainNet *net;
    while ((net = chainNetRead(lf)) != NULL)
	{
	printf("%s\n", net->name);
	++netCount;
	traverseNet(net, NULL, depthGather);
	traverseNet(net, gapFile, gapGather);
	traverseNet(net, fillFile, fillGather);
	traverseNet(net, topFile, topGather);
	traverseNet(net, nonSynFile, nonSynGather);
	traverseNet(net, synFile, synGather);
	traverseNet(net, invFile, invGather);
	traverseNet(net, dupeFile, dupeGather);
	chainNetFree(&net);
	}
    lineFileClose(&lf);
    }

logIt("net chromosomes: %d\n", netCount);
logIt("max depth: %d\n", depthMax);
logIt("gap count: %d\n",  gapCount);
logIt("gap average size T: %4.1f\n", gapSizeT/(double)gapCount);
logIt("gap average size Q: %4.1f\n", gapSizeQ/(double)gapCount);
logFillStats("fill", &fillStats);
logFillStats("top", &topStats);
logFillStats("nonSyn", &nonSynStats);
logFillStats("syn", &synStats);
logFillStats("inv", &invStats);
logFillStats("dupe", &dupeStats);
}
Bits *findUsedIds(char *netFileName)
/* Create a bit array with 1's corresponding to
 * chainId's used in net file. */
{
struct lineFile *lf = lineFileOpen(netFileName, TRUE);
Bits *bits = bitAlloc(maxChainId);
struct chainNet *net;
while ((net = chainNetRead(lf)) != NULL)
    {
    chainNetMarkUsed(net, bits, maxChainId);
    chainNetFree(&net);
    }
lineFileClose(&lf);
return bits;
}
void netToBedDetailed(char *inName, char *outName, 
	int maxGap, int minFill)
/* netToBedDetailed - Convert target coverage of net to a bed 
 * breaking up things at big gaps and excluding small
 * fills. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct chainNet *net;

while ((net = chainNetRead(lf)) != NULL)
    {
    rNetToBed(net, net->fillList, maxGap, minFill, f);
    chainNetFree(&net);
    }
}
Exemple #8
0
struct hash *netToBkHash(char *netFile)
/* Read net file into a hash full of binKeepers keyed by chromosome.
 * The binKeepers are full of nets. */
{
struct hash *netHash = hashNew(0);
struct lineFile *lf = lineFileOpen(netFile, TRUE);
struct chainNet *net, *netList = chainNetRead(lf);
for (net = netList; net != NULL; net = net->next)
    {
    if (hashLookup(netHash, net->name))
        errAbort("%s has multiple %s records", netFile, net->name);
    struct binKeeper *bk = binKeeperNew(0, net->size);
    hashAdd(netHash, net->name, bk);
    struct cnFill *fill;
    for(fill=net->fillList; fill != NULL; fill = fill->next)
	binKeeperAdd(bk, fill->tStart, fill->tStart+fill->tSize, fill);
    }
lineFileClose(&lf);
return netHash;                
}
void netChainSubset(char *netIn, char *chainIn, char *chainOut)
/* netChainSubset - Create chain file with subset of *
 * chains that appear in the net. */
{
struct hash *chainHash;
struct chainNet *net;
struct lineFile *lf = lineFileOpen(netIn, TRUE);
FILE *f = mustOpen(chainOut, "w");
char *gapFileName = optionVal("gapOut", NULL);
FILE *gapFile = NULL;

if (gapFileName)
    gapFile = mustOpen(gapFileName, "w");
chainHash = chainReadAllWithMeta(chainIn, f);
while ((net = chainNetRead(lf)) != NULL)
    {
    verbose(1, "Processing %s\n", net->name);
    rConvert(net->fillList, chainHash, f, gapFile);
    chainNetFree(&net);
    }
}
Exemple #10
0
void netContigs(char *netFile, char *liftFile)
/* netContigs - get query-side contigs from a chrom-level net file */
{
struct lineFile *lf = lineFileOpen(netFile, TRUE);
struct chainNet *net;

struct liftSpec *lift, *prevLift, *lifts;
char *chrom;

/* read lift file and split into a hash of per-chrom lists */
lifts = readLifts(liftFile);
prevLift = NULL;
for (lift = lifts; lift != NULL; lift = lift->next)
    {
    //uglyf("reading lift: %s\n", lift->oldName);
    if (hashLookup(chromHash, lift->newName) == NULL)
        {
        /* new chrom */
        //uglyf("adding chrom: %s\n", lift->newName);
        hashAdd(chromHash, lift->newName, lift);
        /* terminate previous list */
        /* NOTE: expects input sorted by chrom */
        if (prevLift != NULL)
            prevLift->next = NULL;
        }
    prevLift = lift;
    }

/* read in nets and convert query side to contig coords */
//uglyf("reading in nets\n");
while ((net = chainNetRead(lf)) != NULL)
    {
    rLower(net->fillList);
    chainNetFree(&net);
    }
/* print accumulated contigs */
hashTraverseEls(contigNames, printContig);
}
Exemple #11
0
void netClass(char *inName, char *tDb, char *qDb, char *outName)
/* netClass - Add classification info to net. */
{
struct chainNet *net;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct chrom *qChromList, *chrom;
struct hash *qChromHash;
struct hash *arHash = NULL;
struct sqlConnection *tConn = sqlConnect(tDb);
struct sqlConnection *qConn = sqlConnect(qDb);

qLm = lmInit(0);

if (!noAr)
    arHash = getAncientRepeats(tConn, qConn);

getChroms(qConn, &qChromHash, &qChromList);

verbose(1, "Reading gaps in %s\n", qDb);
if (sqlTableExists(qConn, "gap"))
    {
    getSeqGapsUnsplit(qConn, qChromHash);
    }
else
    {
    for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
	chrom->nGaps = getSeqGaps(qConn, chrom->name);
    }

if (qNewR)
    {
    verbose(1, "Reading new repeats from %s\n", qNewR);
    for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
        chrom->newRepeats = getNewRepeats(qNewR, chrom->name);
    }

verbose(1, "Reading simpleRepeats in %s\n", qDb);
getTrfUnsplit(qConn, qChromHash);

if (qRepeatTable)
    {
    verbose(1, "Reading repeats in %s from table %s\n", qDb, qRepeatTable);
    getRepeatsUnsplitTable(qConn, qChromHash, qRepeatTable);
    }
else
    {
    verbose(1, "Reading repeats in %s\n", qDb);
    if (sqlTableExists(qConn, "rmsk"))
	getRepeatsUnsplit(qConn, qChromHash, arHash);
    else
	{
	for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
	    getRepeats(qConn, arHash, chrom->name, &chrom->repeats,
		       &chrom->oldRepeats);
	}
    }

while ((net = chainNetRead(lf)) != NULL)
    {
    struct rbTree *tN, *tRepeats, *tOldRepeats, *tTrf;
    char *tName = net->name;
    if (liftHashT != NULL)
	{
	struct liftSpec *lft = hashMustFindVal(liftHashT, net->name);
	tName = lft->newName;
	}

    verbose(1, "Processing %s.%s\n", tDb, net->name);
    tN = getSeqGaps(tConn, tName);
    tAddN(net, net->fillList, tN);
    rbTreeFree(&tN);
    qAddN(net, net->fillList, qChromHash);

    if (tRepeatTable)
	getRepeatsTable(tConn, tRepeatTable, tName, &tRepeats, &tOldRepeats);
    else
	getRepeats(tConn, arHash, tName, &tRepeats, &tOldRepeats);
    tAddR(net, net->fillList, tRepeats);
    if (!noAr)
	tAddOldR(net, net->fillList, tOldRepeats);
    rbTreeFree(&tRepeats);
    rbTreeFree(&tOldRepeats);
    qAddR(net, net->fillList, qChromHash);
    if (!noAr)
	qAddOldR(net, net->fillList, qChromHash);

    tTrf = getTrf(tConn, tName);
    tAddTrf(net, net->fillList, tTrf);
    rbTreeFree(&tTrf);
    qAddTrf(net, net->fillList, qChromHash);

    if (tNewR)
        {
	struct rbTree *tree = getNewRepeats(tNewR, tName);
	tAddNewR(net, net->fillList, tree);
	rbTreeFree(&tree);
	}
    if (qNewR)
        qAddNewR(net, net->fillList, qChromHash);
    chainNetWrite(net, f);
    chainNetFree(&net);
    }
sqlDisconnect(&tConn);
sqlDisconnect(&qConn);
}
void netFilter(int inCount, char *inFiles[])
/* netFilter - Filter out parts of net.. */
{
FILE *f = stdout;
int i;
boolean doLine = optionExists("line");

tHash = hashCommaOption("t");
notTHash = hashCommaOption("notT");
qHash = hashCommaOption("q");
notQHash = hashCommaOption("notQ");
minScore = optionInt("minScore", -BIGNUM);
maxScore = optionFloat("maxScore", 9e99);
qStartMin = optionInt("qStartMin", -BIGNUM);
qStartMax = optionInt("qStartMax", BIGNUM);
qEndMin = optionInt("qEndMin", -BIGNUM);
qEndMax = optionInt("qEndMax", BIGNUM);
tStartMin = optionInt("tStartMin", -BIGNUM);
tStartMax = optionInt("tStartMax", BIGNUM);
tEndMin = optionInt("tEndMin", -BIGNUM);
tEndMax = optionInt("tEndMax", BIGNUM);
qOverlapStart = optionInt("qOverlapStart", -BIGNUM);
qOverlapEnd = optionInt("qOverlapEnd", BIGNUM);
tOverlapStart = optionInt("tOverlapStart", -BIGNUM);
tOverlapEnd = optionInt("tOverlapEnd", BIGNUM);
doSyn = optionExists("syn");
minTopScore = optionFloat("minTopScore", minTopScore);
minSynScore = optionFloat("minSynScore", minSynScore);
minSynSize = optionFloat("minSynSize", minSynSize);
minSynAli = optionFloat("minSynAli", minSynAli);
maxFar = optionFloat("maxFar", maxFar);
doChimpSyn = optionExists("chimpSyn");
doNonSyn = optionExists("nonsyn");
minGap = optionInt("minGap", minGap);
minAli = optionInt("minAli", minAli);
minSizeT = optionInt("minSizeT", minSizeT);
minSizeQ = optionInt("minSizeQ", minSizeQ);
fillOnly = optionExists("fill");
gapOnly = optionExists("gap");
types = optionMultiVal("type", types);
noRandom = optionExists("noRandom");
noHap = optionExists("noHap");

for (i=0; i<inCount; ++i)
    {
    struct lineFile *lf = lineFileOpen(inFiles[i], TRUE);
    if (doLine)
        {
	netLineFilter(lf, f);
	}
    else
	{
	struct chainNet *net;
	while ((net = chainNetRead(lf)) != NULL)
	    {
	    boolean writeIt = TRUE;
	    if (tHash != NULL && !hashLookup(tHash, net->name))
		writeIt = FALSE;
	    if (notTHash != NULL && hashLookup(notTHash, net->name))
		writeIt = FALSE;
	    if (noRandom && (endsWith(net->name, "_random")
			     || startsWith("chrUn", net->name)
			     || sameWord("chrNA", net->name) /* danRer */
			     || sameWord("chrU", net->name)))  /* dm */
	        writeIt = FALSE;
	    if (noHap && stringIn("_hap",net->name))
		writeIt = FALSE;
	    if (writeIt)
		{
		writeFiltered(net, f);
		}
	    chainNetFree(&net);
	    }
	}
    lineFileClose(&lf);
    }
}