void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName)
/* netToAxt - Convert net (and chain) to axt.. */
{
Bits *usedBits = findUsedIds(netName);
struct hash *chainHash;
struct chainNet *net;
struct lineFile *lf = lineFileOpen(netName, TRUE);
FILE *f = mustOpen(axtName, "w");
struct dnaSeq *tChrom = NULL;
struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir);
char *gapFileName = optionVal("gapOut", NULL);
FILE *gapFile = NULL;

if (gapFileName)
    gapFile = mustOpen(gapFileName, "w");
lineFileSetMetaDataOutput(lf, f);
chainHash = chainReadUsedSwap(chainName, qChain, usedBits);
bitFree(&usedBits);
while ((net = chainNetRead(lf)) != NULL)
    {
    verbose(1, "Processing %s\n", net->name);
    tChrom = nibTwoLoadOne(tNibDir, net->name);
    if (tChrom->size != net->size)
	errAbort("Size mismatch on %s.  Net/nib out of sync or possibly nib dirs swapped?", 
		tChrom->name);
    rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile);
    freeDnaSeq(&tChrom);
    chainNetFree(&net);
    }
nibTwoCacheFree(&qNtc);
}
struct seqPair *readAxtBlocks(char *fileName, struct hash *pairHash, FILE *f)
/* Read in axt file and parse blocks into pairHash */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct dyString *dy = newDyString(512);
struct axt *axt;
struct seqPair *spList = NULL, *sp;

lineFileSetMetaDataOutput(lf, f);
lineFileSetUniqueMetaData(lf);
while ((axt = axtRead(lf)) != NULL)
    {
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", axt->qName, axt->qStrand, axt->tName);
    sp = hashFindVal(pairHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(pairHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(axt->qName);
	sp->tName = cloneString(axt->tName);
	sp->qStrand = axt->qStrand;
	}
    axtAddBlocksToBoxInList(&sp->blockList, axt);
    sp->axtCount += 1;
    axtFree(&axt);
    }
lineFileClose(&lf);
dyStringFree(&dy);
slSort(&spList, seqPairCmp);
return spList;
}
Beispiel #3
0
struct hash *chainReadAllWithMeta(char *fileName, FILE *f)
/* Read chains into a hash keyed by id. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = NULL;
lineFileSetMetaDataOutput(lf, f);
hash = chainReadUsedSwapLf(fileName, FALSE, NULL, lf);
lineFileClose(&lf);
return hash;
}
void liftNet(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .net file. */
{

    FILE *f = mustOpen(destFile, "w");
    int sourceIx;
    int dotMod = dots;

    for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
        {
        char *source = sources[sourceIx];
        struct lineFile *lf = lineFileOpen(source, TRUE);
        struct chainNet *net;
        lineFileSetMetaDataOutput(lf, f);
        verbose(1, "Lifting %s\n", source);
        while ((net = chainNetRead(lf)) != NULL)
            {
            if (querySide)
                {
                struct hash *newNameHash = hashNew(6);
                liftFillsQ(net->fillList, newNameHash, liftHash, lf);
                hashFree(&(net->nameHash));
                net->nameHash = newNameHash;
                }
            else
                {
                struct liftSpec *spec = findLift(liftHash, net->name, lf);
                if (spec == NULL)
                    {
                    if (how != carryMissing)
                        {
                        chainNetFree(&net);
                        continue;
                        }
                    }
                else
                    {
                    freeMem(net->name);
                    net->name = cloneString(spec->newName);
                    net->size = spec->newSize;
                    liftFillsT(net->fillList, spec);
                    }
                }
            chainNetWrite(net, f);
            chainNetFree(&net);
            doDots(&dotMod);
            }
        lineFileClose(&lf);
        if (dots)
            verbose(1, "\n");
        }
}
Beispiel #5
0
void chainMergeSort(int fileCount, char *files[], FILE *out, int level)
/* chainMergeSort - Combine sorted files into larger sorted file. */
{
int i;
struct chainFile *cf;
int id = 0;
struct quickHeap *h = NULL;

h = newQuickHeap(fileCount, &cmpChainScores);

/* Open up all input files and read first chain. */
for (i=0; i<fileCount; ++i)
    {
    AllocVar(cf);
    cf->lf = lineFileOpen(files[i], TRUE);
    lineFileSetMetaDataOutput(cf->lf, out);
    cf->chain = chainRead(cf->lf);
    if (cf->chain)
    	addToQuickHeap(h, cf);
    else
	cfEof(&cf,level);  /* deal with EOF */
    }

while (!quickHeapEmpty(h))
    {
    cf = peekQuickHeapTop(h);
    if (!saveId)
	cf->chain->id = ++id;		/* We reset id's here. */
    chainWrite(cf->chain, out);
    chainFree(&cf->chain);
    if ((cf->chain = chainRead(cf->lf)))
	{
	quickHeapTopChanged(h);
	}
    else
	{ /* deal with EOF */
	if (!removeFromQuickHeapByElem(h, cf))
	    errAbort("unexpected error: chainFile not found on heap");
	cfEof(&cf,level);  
	}
    }

freeQuickHeap(&h);

}
void axtRescore(char *in, char *out)
/* axtRescore - Recalculate scores in axt. */
{
struct lineFile *lf = lineFileOpen(in, TRUE);
FILE *f = mustOpen(out, "w");
struct axt *axt;

lineFileSetMetaDataOutput(lf, f);
axtScoreSchemeDnaWrite(scoreScheme, f, "axtRescore");
for (;;)
    {
    axt = axtRead(lf);
    if (axt == NULL)
        break;
    axt->score = axtScore(axt, scoreScheme);
    axtWrite(axt, f);
    axtFree(&axt);
    }
}
Beispiel #7
0
void chainSplit(char *outDir, int inCount, char *inFiles[])
/* chainSplit - Split chains up by target or query sequence. */
{
struct hash *hash = newHash(0);
int inIx;
char tpath[512];
FILE *meta ;
bool metaOpen = TRUE;
makeDir(outDir);
safef(tpath, sizeof(tpath), "%s/meta.tmp", outDir);
meta = mustOpen(tpath,"w");

for (inIx = 0; inIx < inCount; ++inIx)
    {
    struct lineFile *lf = lineFileOpen(inFiles[inIx], TRUE);
    struct chain *chain;
    FILE *f;
    lineFileSetMetaDataOutput(lf, meta);
    while ((chain = chainRead(lf)) != NULL)
        {
	char *name = (splitOnQ ? chain->qName : chain->tName);
	if (lump > 0)
	    name = lumpName(name);
	if ((f = hashFindVal(hash, name)) == NULL)
	    {
	    char path[512], cmd[512];
	    safef(path, sizeof(path),"%s/%s.chain", outDir, name);
            if (metaOpen)
                fclose(meta);
            metaOpen = FALSE;
	    safef(cmd,sizeof(cmd), "cat %s | sort -u > %s", tpath, path);
            mustSystem(cmd);
	    f = mustOpen(path, "a");
	    hashAdd(hash, name, f);
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	}
    lineFileClose(&lf);
    }
}
void chainPreNet(char *inFile, char *targetSizes, char *querySizes, 
	char *outFile)
/* chainPreNet - Remove chains that don't have a chance of being netted. */
{
struct hash *tHash = setupChroms(targetSizes);
struct hash *qHash = setupChroms(querySizes);
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct chain *chain;
double score, lastScore = 9e99;
struct chrom *qChrom, *tChrom;

lineFileSetMetaDataOutput(lf, f);
while ((chain = chainRead(lf)) != NULL)
    {
    /* Report progress. */
    dotOut();

    /* Check to make sure it really is sorted by score. */
    score = chain->score;
    if (score > lastScore)
       {
       errAbort("%s not sorted by score line %d", 
       		lf->fileName, lf->lineIx);
       }
    lastScore = score;

    /* Output chain if necessary and then free it. */
    qChrom = hashMustFindVal(qHash, chain->qName);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (chainUsed(chain, qChrom, tChrom) && inclQuery(chain))
	{
	chainWrite(chain, f);
	}
    chainFree(&chain);
    }
}
void liftChain(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .chain file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;
int dotMod = dots;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    char *source = sources[sourceIx];
    struct lineFile *lf = lineFileOpen(source, TRUE);
    struct chain *chain;
    lineFileSetMetaDataOutput(lf, f);
    verbose(1, "Lifting %s\n", source);
    while ((chain = chainRead(lf)) != NULL)
	{
	struct liftSpec *spec;
	char *seqName = querySide ? chain->qName : chain->tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
		{
		chainFree(&chain);
		continue;
		}
	    }
	else
	    {
	    struct cBlock *b = NULL;
	    int offset = spec->offset;
	    if (spec->strand == '-')
		{
		if (querySide)
		    {
		    int qSpan = chain->qEnd - chain->qStart;
		    if (chain->qStrand == '-')
		        chain->qStart += spec->offset;
		    else
		        {
			chain->qStart = spec->newSize - spec->offset 
				- (chain->qSize - chain->qStart);
			}
		    chain->qEnd = chain->qStart + qSpan;
		    chain->qStrand = flipStrand(chain->qStrand);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    /* We don't need to mess with the blocks here
		     * since they are all relative to the start. */
	            }
		else
		    {
		    /* We try and keep the target strand positive, so we end up
		     * flipping in both target and query and flipping the target
		     * strand. */
		    reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize);
		    reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize);
		    chain->qStrand = flipStrand(chain->qStrand);

		    /* Flip around blocks and add offset. */
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			reverseIntRange(&b->qStart, &b->qEnd, chain->qSize);
			reverseIntRange(&b->tStart, &b->tEnd, chain->tSize);
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    slReverse(&chain->blockList);

		    /* On target side add offset as well and update name and size. */
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    }
		}
	    else
		{
		if (querySide)
		    {
		    if (chain->qStrand == '-')
			offset = spec->newSize - (spec->offset + spec->oldSize);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    chain->qStart += offset;
		    chain->qEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->qStart += offset;
			b->qEnd   += offset;
			}
		    }
		else
		    {
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    }
		}
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	doDots(&dotMod);
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
void liftAxt(char *destFile, struct hash *liftHash, 
	int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .axt file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;
int dotMod = dots;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    char *source = sources[sourceIx];
    struct lineFile *lf = lineFileOpen(source, TRUE);
    struct axt *axt;
    lineFileSetMetaDataOutput(lf, f);
    verbose(1, "Lifting %s\n", source);
    while ((axt = axtRead(lf)) != NULL)
        {
	struct liftSpec *spec;
	struct axt a = *axt;
	char *seqName;
	if (querySide)
	    seqName = a.qName;
	else
	    seqName = a.tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
	        {
		axtFree(&axt);
		continue;
		}
	    }
	else
	    {
	    int offset;
	    char strand = (querySide ? a.qStrand : a.tStrand);
	    cantHandleSpecRevStrand(spec);
	    if (strand == '-')
		{
		int ctgEnd = spec->offset + spec->oldSize;
		offset = spec->newSize - ctgEnd;
		}
	    else
		offset = spec->offset;
	    if (querySide)
	        {
		a.qStart += offset;
		a.qEnd += offset;
		a.qName = spec->newName;
		}
	    else
	        {
		a.tStart += offset;
		a.tEnd += offset;
		a.tName = spec->newName;
		if (strand == '-')
                    warn("Target minus strand, please double check results.");
                }
            }
        axtWrite(&a, f);
        axtFree(&axt);
        doDots(&dotMod);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
Beispiel #11
0
void chainNet(char *chainFile, char *tSizes, char *qSizes, 
	char *tNet, char *qNet)
/* chainNet - Make alignment nets out of chains. */
{
struct lineFile *lf = lineFileOpen(chainFile, TRUE);
struct hash *qHash, *tHash;
struct chrom *qChromList, *tChromList, *tChrom, *qChrom;
struct chain *chain;
double lastScore = -1;
struct lm *lm = lmInit(0);
struct rbTreeNode **rbStack;
FILE *tNetFile = mustOpen(tNet, "w");
FILE *qNetFile = mustOpen(qNet, "w");


lmAllocArray(lm, rbStack, 256);
makeChroms(qSizes, lm, rbStack, &qHash, &qChromList);
makeChroms(tSizes, lm, rbStack, &tHash, &tChromList);
verbose(1, "Got %d chroms in %s, %d in %s\n", slCount(tChromList), tSizes,
       slCount(qChromList), qSizes);
lineFileSetMetaDataOutput(lf, tNetFile);
lineFileSetMetaDataOutput(lf, qNetFile);

/* Loop through chain file building up net. */
while ((chain = chainRead(lf)) != NULL)
    {
    /* Make sure that input is really sorted. */
    if (lastScore >= 0 && chain->score > lastScore)
        errAbort("%s must be sorted in order of score", chainFile);
    lastScore = chain->score;

    if (chain->score < minScore) 
	{
    	break;
	}
    verbose(2, "chain %f (%d els) %s %d-%d %c %s %d-%d\n", 
	    chain->score, slCount(chain->blockList), 
	    chain->tName, chain->tStart, chain->tEnd, 
	    chain->qStrand, chain->qName, chain->qStart, chain->qEnd);
    qChrom = hashMustFindVal(qHash, chain->qName);
    if (qChrom->size != chain->qSize)
        errAbort("%s is %d in %s but %d in %s", chain->qName, 
		chain->qSize, chainFile,
		qChrom->size, qSizes);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (tChrom->size != chain->tSize)
        errAbort("%s is %d in %s but %d in %s", chain->tName, 
		chain->tSize, chainFile,
		tChrom->size, tSizes);
    if (!inclQuery(chain))
        verbose(2, "skipping chain on query %s\n", chain->qName);
    else
        {
        addChain(qChrom, tChrom, chain);
        verbose(2, "%s has %d inserts, %s has %d\n", tChrom->name, 
                tChrom->spaces->n, qChrom->name, qChrom->spaces->n);
        }
    }
/* Build up other side of fills.  It's just for historical 
 * reasons this is not done during the main build up.   
 * It's a little less efficient this way, but to change it
 * some hard reverse strand issues would have to be juggled. */
verbose(1, "Finishing nets\n");
finishNet(qChromList, TRUE);
finishNet(tChromList, FALSE);

/* Write out basic net files. */
verbose(1, "writing %s\n", tNet);
outputNetSide(tChromList, tNetFile, FALSE);
verbose(1, "writing %s\n", qNet);
outputNetSide(qChromList, qNetFile, TRUE);

/* prevent SIGPIPE in preceding process if input is a pipe, consume remainder
 * of input file since we stop before EOF. */
if (isPipe(lf->fd))
    {
    char *line;
    while(lineFileNext(lf, &line, NULL))
        continue;
    }
lineFileClose(&lf);

if (verboseLevel() > 1)
    printMem(stderr);
}