void writeChainWhole(struct chain *chain, FILE *f, FILE *gapFile)
/* Write out entire chain. */
{
chainWrite(chain, f);
if (gapFile != NULL)
    gapWrite(chain, gapFile);
}
예제 #2
0
파일: chain.c 프로젝트: ma-compbio/RACA
void chainWriteAll(struct chain *chainList, FILE *f)
/* Write all chains to file. */
{
struct chain *chain;
for (chain = chainList; chain != NULL; chain = chain->next)
    chainWrite(chain, f);
}
static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term)
/* write a chromAnn that is chain */
{
struct chain *chain = ca->rec;
assert(term == '\n');
chainWrite(chain, fh);
}
void writeChainPart(struct chain *chain, int tStart, int tEnd, FILE *f,
	FILE *gapFile)
/* Write out part of a chain. */
{
struct chain *subChain, *chainToFree;

chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree);
assert(subChain != NULL);
chainWrite(subChain, f);
if (gapFile != NULL)
    gapWrite(subChain, gapFile);
chainFree(&chainToFree);
}
void chainStitchId(char *inChain, char *outChain)
/* chainStitchId - Join chain fragments with the same chain ID into a single chain per ID. */
{
struct lineFile *lf = lineFileOpen(inChain, TRUE);
struct chain *chain = NULL, *chainList = NULL;
FILE *f = mustOpen(outChain, "w");
int idArrLen = 64 * 1024 * 1024;
struct chain **idArr = needLargeZeroedMem(idArrLen * sizeof(struct chain *));
int i=0;

/* Build up an array of chains, indexed by IDs.  Agglomerate chains with same 
 * ID as we go. */
while ((chain = chainRead(lf)) != NULL)
    {
    while (chain->id >= idArrLen)
	{
	idArr = needMoreMem(idArr, idArrLen, idArrLen*2*sizeof(idArr[0]));
	idArrLen *= 2;
	}
    if (idArr[chain->id] == NULL)
	idArr[chain->id] = chain;
    else
	{
	tackOnFrag(idArr[chain->id], chain);
	chainFree(&chain);
	}
    }
lineFileClose(&lf);

/* Clean up each agglomerated chain and add to head of list (but step 
 * backwards so the resulting list is in order by chain id). */
for (i = idArrLen-1;  i >= 0;  i--)
    {
    chain = idArr[i];
    if (chain != NULL)
	{
	slSort(&(chain->blockList), cBlockCmpTarget);
	slAddHead(&chainList, chain);
	}
    }

/* Ordering by original chain id gets us most of the way to sorting by 
 * score, but not all the way: sort and finally write out the chains. */
slSort(&chainList, chainCmpScore);
for (chain = chainList;  chain != NULL;  chain = chain->next)
    {
    chainWrite(chain, f);
    /* could free here, but program is about to end so why waste the time. */
    }
carefulClose(&f);
}
예제 #6
0
void chainMergeSort(int fileCount, char *files[], FILE *out, int level)
/* chainMergeSort - Combine sorted files into larger sorted file. */
{
int i;
struct chainFile *cf;
int id = 0;
struct quickHeap *h = NULL;

h = newQuickHeap(fileCount, &cmpChainScores);

/* Open up all input files and read first chain. */
for (i=0; i<fileCount; ++i)
    {
    AllocVar(cf);
    cf->lf = lineFileOpen(files[i], TRUE);
    lineFileSetMetaDataOutput(cf->lf, out);
    cf->chain = chainRead(cf->lf);
    if (cf->chain)
    	addToQuickHeap(h, cf);
    else
	cfEof(&cf,level);  /* deal with EOF */
    }

while (!quickHeapEmpty(h))
    {
    cf = peekQuickHeapTop(h);
    if (!saveId)
	cf->chain->id = ++id;		/* We reset id's here. */
    chainWrite(cf->chain, out);
    chainFree(&cf->chain);
    if ((cf->chain = chainRead(cf->lf)))
	{
	quickHeapTopChanged(h);
	}
    else
	{ /* deal with EOF */
	if (!removeFromQuickHeapByElem(h, cf))
	    errAbort("unexpected error: chainFile not found on heap");
	cfEof(&cf,level);  
	}
    }

freeQuickHeap(&h);

}
예제 #7
0
void chainSplit(char *outDir, int inCount, char *inFiles[])
/* chainSplit - Split chains up by target or query sequence. */
{
struct hash *hash = newHash(0);
int inIx;
char tpath[512];
FILE *meta ;
bool metaOpen = TRUE;
makeDir(outDir);
safef(tpath, sizeof(tpath), "%s/meta.tmp", outDir);
meta = mustOpen(tpath,"w");

for (inIx = 0; inIx < inCount; ++inIx)
    {
    struct lineFile *lf = lineFileOpen(inFiles[inIx], TRUE);
    struct chain *chain;
    FILE *f;
    lineFileSetMetaDataOutput(lf, meta);
    while ((chain = chainRead(lf)) != NULL)
        {
	char *name = (splitOnQ ? chain->qName : chain->tName);
	if (lump > 0)
	    name = lumpName(name);
	if ((f = hashFindVal(hash, name)) == NULL)
	    {
	    char path[512], cmd[512];
	    safef(path, sizeof(path),"%s/%s.chain", outDir, name);
            if (metaOpen)
                fclose(meta);
            metaOpen = FALSE;
	    safef(cmd,sizeof(cmd), "cat %s | sort -u > %s", tpath, path);
            mustSystem(cmd);
	    f = mustOpen(path, "a");
	    hashAdd(hash, name, f);
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	}
    lineFileClose(&lf);
    }
}
void chainPreNet(char *inFile, char *targetSizes, char *querySizes, 
	char *outFile)
/* chainPreNet - Remove chains that don't have a chance of being netted. */
{
struct hash *tHash = setupChroms(targetSizes);
struct hash *qHash = setupChroms(querySizes);
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct chain *chain;
double score, lastScore = 9e99;
struct chrom *qChrom, *tChrom;

lineFileSetMetaDataOutput(lf, f);
while ((chain = chainRead(lf)) != NULL)
    {
    /* Report progress. */
    dotOut();

    /* Check to make sure it really is sorted by score. */
    score = chain->score;
    if (score > lastScore)
       {
       errAbort("%s not sorted by score line %d", 
       		lf->fileName, lf->lineIx);
       }
    lastScore = score;

    /* Output chain if necessary and then free it. */
    qChrom = hashMustFindVal(qHash, chain->qName);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (chainUsed(chain, qChrom, tChrom) && inclQuery(chain))
	{
	chainWrite(chain, f);
	}
    chainFree(&chain);
    }
}
void chainStitch(char *psls, char *chains, char *outChainName, char *outFoundName, char *outNotFoundName)
/* chainStitch - Stitch psls into chains. */
{
int lastChainId = -1;
struct psl *prevPsl, *nextPsl;
struct psl *fakePslList;
int jj;
int deletedBases, addedBases;
FILE *outFound = mustOpen(outFoundName, "w");
FILE *outNotFound = mustOpen(outNotFoundName, "w");
FILE *outChains = mustOpen(outChainName, "w");
struct lineFile *chainsLf = lineFileOpen(chains, TRUE);
struct cseqPair *cspList = NULL, *csp;
struct seqPair *spList = NULL, *sp;
struct lineFile *pslLf = pslFileOpen(psls);
struct dyString *dy = newDyString(512);
struct psl *psl;
struct hash *pslHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct hash *chainHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct chain *chain, *chainList = NULL;
struct cBlock *block , *nextBlock = NULL, *prevBlock = NULL;
int count;

count = 0;
while ((psl = pslNext(pslLf)) != NULL)
    {
    assert((psl->strand[1] == 0) || (psl->strand[1] == '+'));
    count++;
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", psl->qName, psl->strand[0], psl->tName);
    sp = hashFindVal(pslHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(pslHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(psl->qName);
	sp->tName = cloneString(psl->tName);
	sp->qStrand = psl->strand[0];
	}

    slAddHead(&sp->psl, psl);
    }
lineFileClose(&pslLf);
printf("read in  %d psls\n",count);

for(sp = spList; sp; sp = sp->next)
    slReverse(&sp->psl);
    //slSort(&sp->psl, pslCmpTarget);

count = 0;
while ((chain = chainRead(chainsLf)) != NULL)
    {
    if (chain->id > lastChainId)
	lastChainId = chain->id;
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    csp = hashFindVal(chainHash, dy->string);
    if (csp == NULL)
        {
	AllocVar(csp);
	slAddHead(&cspList, csp);
	hashAddSaveName(chainHash, dy->string, csp, &csp->name);
	csp->qName = cloneString(chain->qName);
	csp->tName = cloneString(chain->tName);
	csp->qStrand = chain->qStrand;
	}
    slAddHead(&csp->chain, chain);
    count++;
    }
lineFileClose(&chainsLf);
printf("read in %d chains\n",count);

for(csp = cspList; csp; csp = csp->next)
    {
    slSort(&csp->chain, chainCmpTarget);
//    csp->chain = aggregateChains(csp->chain);
    }

addedBases = deletedBases = 0;
for(sp = spList; sp; sp = sp->next)
    {
#ifdef NOTNOW
    /* find the chains associated with this strand */
    if ((csp = hashFindVal(chainHash, sp->name)) != NULL)
	{
	/* first check to see if psl blocks are in any chains */
	checkInChains(&sp->psl, &csp->chain, outFound, &addedBases);

	/* now extend chains to the right */
	checkAfterChains(&sp->psl, &csp->chain, outFound, &addedBases);

	/* now extend chains to the left */
	slReverse(&sp->psl);
	checkBeforeChains(&sp->psl, &csp->chain, outFound, &addedBases);
	}
#endif

    /* do we still have psl's */
    if (sp->psl != NULL)
	{
	/* make sure we have a chainList */
	chainList = NULL;
	if (csp == NULL)
	    {
	    AllocVar(csp);
	    slAddHead(&cspList, csp);
	    csp->qName = cloneString(sp->psl->qName);
	    csp->tName = cloneString(sp->psl->tName);
	    csp->qStrand = sp->psl->strand[0];
	    dyStringClear(dy);
	    dyStringPrintf(dy, "%s%c%s", csp->qName, csp->qStrand, csp->tName);
	    hashAddSaveName(chainHash, dy->string, csp, &csp->name);
	    }
	for(psl = sp->psl; psl ;  psl = nextPsl)
	    {
	    /* this psl will either fit a chain or make a new one */

	    nextPsl = psl->next;
	    sp->psl  = nextPsl;

	    psl->next = NULL;
	    fakePslList = psl;
	    if (chainList)
		checkInChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}
	//    if (chainList)
	//	checkAfterChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}
	    if (chainList)
		checkBeforeChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}

	    AllocVar(chain);
	    chain->tStart = psl->tStarts[0];
	    chain->qStart = psl->qStarts[0];
	    chain->tEnd = psl->tStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1];
	    chain->qEnd = psl->qStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1];
	    chain->tSize = psl->tSize;
	    chain->qSize = psl->qSize;
	    chain->qName = cloneString(psl->qName);
	    chain->tName = cloneString(psl->tName);
	    chain->qStrand = psl->strand[0];
	    chain->id = ++lastChainId;

	    if (!addPslToChain(chain, psl, &addedBases))
		errAbort("new ");
	    slAddHead(&chainList, chain);

	    pslTabOut(psl, outFound);
	    freez(&psl);
	    }

	csp->chain = slCat(csp->chain, chainList);
	}

    slSort(&csp->chain, chainCmpTarget);
    csp->chain = aggregateChains(csp->chain);
    }
fclose(outFound);
printf("deleted %d bases\n",deletedBases);
printf("added %d bases\n",addedBases);
count = 0;
for(sp = spList; sp; sp = sp->next)
    for(psl = sp->psl ; psl ; psl = psl->next)
	{
	pslTabOut(psl, outNotFound);
	count++;
	}
fclose(outNotFound);

printf("%d psls remain\n",count);

for(csp = cspList; csp; csp = csp->next)
    for(chain = csp->chain ; chain ; chain = chain->next)
	{
	//slSort(&chain->blockList, boxInCmpBoth);
	chain->tStart = chain->blockList->tStart;
	chain->qStart = chain->blockList->qStart;

	for(block = chain->blockList; block; block = block->next)
	    {
	    chain->tEnd = block->tEnd;
	    chain->qEnd = block->qEnd;
	    }
	chainWrite(chain,  outChains);
	}
fclose(outChains);

dyStringFree(&dy);
}
void liftChain(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .chain file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;
int dotMod = dots;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    char *source = sources[sourceIx];
    struct lineFile *lf = lineFileOpen(source, TRUE);
    struct chain *chain;
    lineFileSetMetaDataOutput(lf, f);
    verbose(1, "Lifting %s\n", source);
    while ((chain = chainRead(lf)) != NULL)
	{
	struct liftSpec *spec;
	char *seqName = querySide ? chain->qName : chain->tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
		{
		chainFree(&chain);
		continue;
		}
	    }
	else
	    {
	    struct cBlock *b = NULL;
	    int offset = spec->offset;
	    if (spec->strand == '-')
		{
		if (querySide)
		    {
		    int qSpan = chain->qEnd - chain->qStart;
		    if (chain->qStrand == '-')
		        chain->qStart += spec->offset;
		    else
		        {
			chain->qStart = spec->newSize - spec->offset 
				- (chain->qSize - chain->qStart);
			}
		    chain->qEnd = chain->qStart + qSpan;
		    chain->qStrand = flipStrand(chain->qStrand);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    /* We don't need to mess with the blocks here
		     * since they are all relative to the start. */
	            }
		else
		    {
		    /* We try and keep the target strand positive, so we end up
		     * flipping in both target and query and flipping the target
		     * strand. */
		    reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize);
		    reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize);
		    chain->qStrand = flipStrand(chain->qStrand);

		    /* Flip around blocks and add offset. */
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			reverseIntRange(&b->qStart, &b->qEnd, chain->qSize);
			reverseIntRange(&b->tStart, &b->tEnd, chain->tSize);
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    slReverse(&chain->blockList);

		    /* On target side add offset as well and update name and size. */
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    }
		}
	    else
		{
		if (querySide)
		    {
		    if (chain->qStrand == '-')
			offset = spec->newSize - (spec->offset + spec->oldSize);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    chain->qStart += offset;
		    chain->qEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->qStart += offset;
			b->qEnd   += offset;
			}
		    }
		else
		    {
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    }
		}
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	doDots(&dotMod);
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut)
/* axtChain - Chain together axt alignments.. */
{
struct hash *pairHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct seqPair *spList = NULL, *sp;
FILE *f = mustOpen(chainOut, "w");
char *qName = "",  *tName = "";
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char qStrand = 0, tStrand = 0;
struct chain *chainList = NULL, *chain;
FILE *details = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *tFaHash = newHash(0);
FILE *faF;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain");

if (detailsName != NULL)
    details = mustOpen(detailsName, "w");
/* Read input file and divide alignments into various parts. */
if (optionExists("psl"))
    spList = readPslBlocks(axtIn, pairHash, f);
else
    spList = readAxtBlocks(axtIn, pairHash, f);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
if (optionExists("faT"))
    {
    faF = mustOpen(tNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(tFaHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    slReverse(&sp->blockList);
    removeExactOverlaps(&sp->blockList);
    verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList));
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
	{
        loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, 
		&qName, &qSeq, &qStrand);
        }
    if (optionExists("faT"))
        {
        assert (tFaHash != NULL);
        loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand);
        }
    else 
	{
        loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', 
		&tName, &tSeq, &tStrand);
	}
    chainPair(sp, qSeq, tSeq, &chainList, details);
    }
slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}
void doChainScore(char *chainIn, char *tNibDir, char *qNibDir, char *chainOut)
{
char qStrand = 0, tStrand = 0;
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char *qName = "",  *tName = "";
FILE *f = mustOpen(chainOut, "w");
struct chain *chainList = NULL, *chain;
struct chain *inputChains, *next;
FILE *details = NULL;
struct lineFile *lf = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *chainHash = newHash(0);
char comment[1024];
FILE *faF;
struct seqPair *spList = NULL, *sp;
struct dyString *dy = newDyString(512);
struct lineFile *chainsLf = lineFileOpen(chainIn, TRUE);

while ((chain = chainRead(chainsLf)) != NULL)
    {
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    sp = hashFindVal(chainHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(chainHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(chain->qName);
	sp->tName = cloneString(chain->tName);
	sp->qStrand = chain->qStrand;
	}
    slAddHead(&sp->chain, chain);
    }
slSort(&spList, seqPairCmp);
lineFileClose(&chainsLf);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
        loadIfNewSeq(qNibDir, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
    loadIfNewSeq(tNibDir, sp->tName, '+', &tName, &tSeq, &tStrand);
    scorePair(sp, qSeq, tSeq, &chainList, sp->chain);
    }


slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}