Exemplo n.º 1
0
struct hash *allChainsHash(char *fileName)
/* Hash all the chains in a given file by their ids. */
{
struct hash *chainHash = newHash(18);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chain *chain;
char chainId[20];
struct lm *lm = chainHash->lm;
struct rbTreeNode **stack;

lmAllocArray(lm, stack, 128);
while ((chain = chainRead(lf)) != NULL)
    {
    struct indexedChain *ixc;
    lmAllocVar(lm, ixc);
    ixc->chain = chain;
#ifdef SOON
#endif /* SOON */
    ixc->blockTree = rangeTreeNewDetailed(lm, stack);
    struct cBlock *block;
    for (block = chain->blockList; block != NULL; block = block->next)
	{
        struct range *r = rangeTreeAdd(ixc->blockTree, block->tStart, block->tEnd);
	r->val = block;
	}
    safef(chainId, sizeof(chainId), "%x", chain->id);
    hashAddUnique(chainHash, chainId, ixc);
    }
lineFileClose(&lf);
return chainHash;
}
Exemplo n.º 2
0
struct hash *readChainToBinKeeper(char *sizeFileName, char *fileName)
{
struct binKeeper *bk; 
struct chain *chain;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
struct hash *hash = newHash(0);
char *chromRow[2];

while (lineFileRow(sf, chromRow))
    {
    char *name = chromRow[0];
    int size = lineFileNeedNum(sf, chromRow, 1);

    if (hashLookup(hash, name) != NULL)
        warn("Duplicate %s, ignoring all but first\n", name);
    else
        {
        bk = binKeeperNew(0, size);
        assert(size > 1);
	hashAdd(hash, name, bk);
        }
    }
while ((chain = chainRead(lf)) != NULL)
    {
    bk = hashMustFindVal(hash, chain->tName);
    binKeeperAdd(bk, chain->tStart, chain->tEnd, chain);
    }
lineFileClose(&lf);
return hash;
}
Exemplo n.º 3
0
void chainToPsl(char *inName, char *tSizeFile, char *qSizeFile,  char *targetList, char *queryList, char *outName)
/* chainToPsl - Convert chain file to psl format. */
{
struct hash *tSizeHash = readSizes(tSizeFile);
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct hash *fileHash = newHash(0);  /* No value. */
struct hash *tHash = newHash(20);  /* seqFilePos value. */
struct hash *qHash = newHash(20);  /* seqFilePos value. */
struct dlList *fileCache = newDlList();
struct chain *chain;
int q,t;

verbose(1, "Scanning %s\n", targetList);
hashFileList(targetList, fileHash, tHash);
verbose(1, "Scanning %s\n", queryList);
hashFileList(queryList, fileHash, qHash);
verbose(1, "Converting %s\n", inName);

while ((chain = chainRead(lf)) != NULL)
    {
    //uglyf("chain %s %s \n",chain->tName,chain->qName); 
    q = findSize(qSizeHash, chain->qName);
    t = findSize(tSizeHash, chain->tName);
    aliStringToPsl(lf, chain->qName, chain->tName, chain->qSize, chain->tSize,
	min(chain->tEnd-chain->tStart, chain->qEnd-chain->qStart), chain->qStart, chain->qEnd, chain->tStart, chain->tEnd,
        chain->qStrand, f, chain, tHash, qHash, fileCache);
    chainFree(&chain);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Exemplo n.º 4
0
struct hash *chainReadUsedSwapLf(char *fileName, boolean swapQ, Bits *bits, struct lineFile *lf)
/* Read chains that are marked as used in the 
 * bits array (which may be NULL) into a hash keyed by id. */
{
char nameBuf[16];
struct hash *hash = hashNew(18);
struct chain *chain;
int usedCount = 0, count = 0;

while ((chain = chainRead(lf)) != NULL)
    {
    ++count;
    if (bits != NULL && !bitReadOne(bits, chain->id))
	{
	chainFree(&chain);
        continue;
	}
    safef(nameBuf, sizeof(nameBuf), "%x", chain->id);
    if (hashLookup(hash, nameBuf))
        errAbort("Duplicate chain %d ending line %d of %s", 
		chain->id, lf->lineIx, lf->fileName);
    if (swapQ)
        chainSwap(chain);
    hashAdd(hash, nameBuf, chain);
    ++usedCount;
    }
return hash;
}
Exemplo n.º 5
0
void chainIndex(char *inChain, char *outIndex)
/* chainIndex - Create simple two column file index for chain. */
{
struct lineFile *lf = lineFileOpen(inChain, TRUE);
FILE *f = mustOpen(outIndex, "w");
struct chain *chain, *lastChain = NULL;
long pos = 0;
struct hash *uniqHash = hashNew(16);

while ((chain = chainRead(lf)) != NULL)
    {
    if (lastChain == NULL || !sameString(chain->tName, lastChain->tName))
	{
	if (hashLookup(uniqHash, chain->tName))
	    {
	    errAbort("%s is not sorted, %s repeated with intervening %s", 
	    	inChain, chain->tName, lastChain->tName);
	    }
	hashAddInt(uniqHash, chain->tName, pos);
        fprintf(f, "%lx\t%s\n", pos, chain->tName);
	}
    chainFree(&lastChain);
    lastChain = chain;
    pos = lineFileTell(lf);
    }
}
Exemplo n.º 6
0
void chainMergeSort(int fileCount, char *files[], FILE *out, int level)
/* chainMergeSort - Combine sorted files into larger sorted file. */
{
int i;
struct chainFile *cf;
int id = 0;
struct quickHeap *h = NULL;

h = newQuickHeap(fileCount, &cmpChainScores);

/* Open up all input files and read first chain. */
for (i=0; i<fileCount; ++i)
    {
    AllocVar(cf);
    cf->lf = lineFileOpen(files[i], TRUE);
    lineFileSetMetaDataOutput(cf->lf, out);
    cf->chain = chainRead(cf->lf);
    if (cf->chain)
    	addToQuickHeap(h, cf);
    else
	cfEof(&cf,level);  /* deal with EOF */
    }

while (!quickHeapEmpty(h))
    {
    cf = peekQuickHeapTop(h);
    if (!saveId)
	cf->chain->id = ++id;		/* We reset id's here. */
    chainWrite(cf->chain, out);
    chainFree(&cf->chain);
    if ((cf->chain = chainRead(cf->lf)))
	{
	quickHeapTopChanged(h);
	}
    else
	{ /* deal with EOF */
	if (!removeFromQuickHeapByElem(h, cf))
	    errAbort("unexpected error: chainFile not found on heap");
	cfEof(&cf,level);  
	}
    }

freeQuickHeap(&h);

}
static struct mappingCnts *cntChains(char *chainFile)
/* count all chains */
{
struct mappingCnts *mCnts = mappingCntsNew();
struct lineFile *chainLf = lineFileOpen(chainFile, TRUE);
struct chain *chain;
while ((chain = chainRead(chainLf)) != NULL)
    cntChain(mCnts, chain);
lineFileClose(&chainLf);
return mCnts;
}
void chainStitchId(char *inChain, char *outChain)
/* chainStitchId - Join chain fragments with the same chain ID into a single chain per ID. */
{
struct lineFile *lf = lineFileOpen(inChain, TRUE);
struct chain *chain = NULL, *chainList = NULL;
FILE *f = mustOpen(outChain, "w");
int idArrLen = 64 * 1024 * 1024;
struct chain **idArr = needLargeZeroedMem(idArrLen * sizeof(struct chain *));
int i=0;

/* Build up an array of chains, indexed by IDs.  Agglomerate chains with same 
 * ID as we go. */
while ((chain = chainRead(lf)) != NULL)
    {
    while (chain->id >= idArrLen)
	{
	idArr = needMoreMem(idArr, idArrLen, idArrLen*2*sizeof(idArr[0]));
	idArrLen *= 2;
	}
    if (idArr[chain->id] == NULL)
	idArr[chain->id] = chain;
    else
	{
	tackOnFrag(idArr[chain->id], chain);
	chainFree(&chain);
	}
    }
lineFileClose(&lf);

/* Clean up each agglomerated chain and add to head of list (but step 
 * backwards so the resulting list is in order by chain id). */
for (i = idArrLen-1;  i >= 0;  i--)
    {
    chain = idArr[i];
    if (chain != NULL)
	{
	slSort(&(chain->blockList), cBlockCmpTarget);
	slAddHead(&chainList, chain);
	}
    }

/* Ordering by original chain id gets us most of the way to sorting by 
 * score, but not all the way: sort and finally write out the chains. */
slSort(&chainList, chainCmpScore);
for (chain = chainList;  chain != NULL;  chain = chain->next)
    {
    chainWrite(chain, f);
    /* could free here, but program is about to end so why waste the time. */
    }
carefulClose(&f);
}
static struct chromBins* loadMapChains(char *chainFile)
/* read a chain file, convert to mapAln object and chromBins by query locations. */
{
struct chromBins* mapAlns = chromBinsNew((chromBinsFreeFunc*)pslFree);
struct chain *ch;
struct lineFile *chLf = lineFileOpen(chainFile, TRUE);
while ((ch = chainRead(chLf)) != NULL)
    {
    struct mapAln *mapAln = chainToPsl(ch);
    chromBinsAdd(mapAlns, mapAln->psl->qName, mapAln->psl->qStart, mapAln->psl->qEnd, mapAln);
    chainFree(&ch);
    }
lineFileClose(&chLf);
return mapAlns;
}
void chainStats(char *chains)
{
int lastChainId = -1;
struct lineFile *chainsLf = lineFileOpen(chains, TRUE);
struct cseqPair *cspList = NULL, *csp;
struct dyString *dy = newDyString(512);
struct hash *chainHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct chain *chain;
struct cBlock *block;
int count;

count = 0;
while ((chain = chainRead(chainsLf)) != NULL)
    {
    if (chain->id > lastChainId)
	lastChainId = chain->id;
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    csp = hashFindVal(chainHash, dy->string);
    if (csp == NULL)
        {
	AllocVar(csp);
	slAddHead(&cspList, csp);
	hashAddSaveName(chainHash, dy->string, csp, &csp->name);
	csp->qName = cloneString(chain->qName);
	csp->tName = cloneString(chain->tName);
	csp->qStrand = chain->qStrand;
	}
    slAddHead(&csp->chain, chain);
    count++;
    }
lineFileClose(&chainsLf);
printf("read in %d chains\n",count);

for(csp = cspList; csp; csp = csp->next)
    {
    slSort(&csp->chain, chainCmpTarget);
    gapChains(csp->chain);
    for(chain = csp->chain ; chain ; chain = chain->next)
	{
	for(block = chain->blockList; block; block = block->next)
	    {
	    }
	}
    }

dyStringFree(&dy);
}
Exemplo n.º 11
0
struct hash *allChainsHash(char *fileName)
/* Create a hash of all the chains in a file by their id. */
{
struct hash *hash = newHash(0);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chain *chain;
char chainId[128];

while ((chain = chainRead(lf)) != NULL)
    {
    safef(chainId, sizeof(chainId), "%d", chain->id);
    hashAddUnique(hash, chainId, chain);
    }
lineFileClose(&lf);
return hash;
}
Exemplo n.º 12
0
void checkIds(char *inputFileName, char *outputFileName)
/* report if duplicate ID found */
/* put all ids in idHash */
{
struct chain *chainEl;
struct lineFile *lf = lineFileOpen(inputFileName, TRUE);
FILE *outputFileHandle = NULL;
char idString[64];
char *idString2 = NULL;
struct hashEl *hel = NULL;
struct hashEl *hel2 = NULL;
int chainCount = 0;
int dupCount = 0;
struct hashCookie cookie;

idHash = newHash(0);
duplicateHash = newHash(0);
while ((chainEl = chainRead(lf)) != NULL)
    {
    chainCount++;
    safef(idString, sizeof(idString), "%d", chainEl->id);
    hel = hashLookup(idHash, idString);
    if (hel == NULL)
        hashAdd(idHash, cloneString(idString), NULL);
    else
        {
	hel2 = hashLookup(duplicateHash, idString);
	if (hel2 == NULL)
	    hashAdd(duplicateHash, cloneString(idString), NULL);
	}
    }
verbose(1, "chain count = %d\n", chainCount);
// freeHash(&idHash);

/* print contents of duplicateHash */
outputFileHandle = mustOpen(outputFileName, "w");
cookie = hashFirst(duplicateHash);
while ((idString2 = hashNextName(&cookie)) != NULL)
    {
    dupCount++;
    fprintf(outputFileHandle, "%s\n", idString2);
    }
verbose(1, "count of duplicate IDs = %d\n", dupCount);
carefulClose(&outputFileHandle);
// freeHash(&duplicateHash);
}
Exemplo n.º 13
0
struct hash *qSizeHash(char *chainfile)
/* read the chain file and figure out what the chromosome sizes are on the query end */
{
    struct lineFile *lf = lineFileOpen(chainfile, TRUE);
    struct chain *ch;
    struct hash *csizes = hashNew(10);
    while ((ch = chainRead(lf)) != NULL)
    {
	char *chrom = ch->qName;
	int size = ch->qSize;
	if (!hashLookup(csizes, chrom))
	    hashAddInt(csizes, chrom, size);
	chainFree(&ch);
    }
    lineFileClose(&lf);
    return csizes;
}
void doIt(char *inName, char *tNibDirOr2bit, char *qNibDirOr2bit, char *outName)
/* chainToAxt - Convert from chain to axt file. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct nibTwoCache *tSeqCache = nibTwoCacheNew(tNibDirOr2bit);
struct nibTwoCache *qSeqCache = nibTwoCacheNew(qNibDirOr2bit);
struct chain *chain = NULL;
FILE *f = mustOpen(outName, "w");

while ((chain = chainRead(lf)) != NULL)
    {
    if (chain->score >= minScore)
        doAChain(chain, tSeqCache, qSeqCache, f);
    chainFree(&chain);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Exemplo n.º 15
0
void chainSplit(char *outDir, int inCount, char *inFiles[])
/* chainSplit - Split chains up by target or query sequence. */
{
struct hash *hash = newHash(0);
int inIx;
char tpath[512];
FILE *meta ;
bool metaOpen = TRUE;
makeDir(outDir);
safef(tpath, sizeof(tpath), "%s/meta.tmp", outDir);
meta = mustOpen(tpath,"w");

for (inIx = 0; inIx < inCount; ++inIx)
    {
    struct lineFile *lf = lineFileOpen(inFiles[inIx], TRUE);
    struct chain *chain;
    FILE *f;
    lineFileSetMetaDataOutput(lf, meta);
    while ((chain = chainRead(lf)) != NULL)
        {
	char *name = (splitOnQ ? chain->qName : chain->tName);
	if (lump > 0)
	    name = lumpName(name);
	if ((f = hashFindVal(hash, name)) == NULL)
	    {
	    char path[512], cmd[512];
	    safef(path, sizeof(path),"%s/%s.chain", outDir, name);
            if (metaOpen)
                fclose(meta);
            metaOpen = FALSE;
	    safef(cmd,sizeof(cmd), "cat %s | sort -u > %s", tpath, path);
            mustSystem(cmd);
	    f = mustOpen(path, "a");
	    hashAdd(hash, name, f);
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	}
    lineFileClose(&lf);
    }
}
Exemplo n.º 16
0
struct hash *readLiftOverMapChainHash(char *fileName)
/* taken from kent/src/hg/lib/liftOver.c */
/* Read map file into hashes. */
{
    struct hash *chainHash = hashNew(10);
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct chain *chain;
    struct liftOverChromMap *map;
    
    while ((chain = chainRead(lf)) != NULL)
    {
	if ((map = hashFindVal(chainHash, chain->tName)) == NULL)
	{
	    AllocVar(map);
	    map->bk = binKeeperNew(0, chain->tSize);
	    hashAddSaveName(chainHash, chain->tName, map, &map->name);
	}
	binKeeperAdd(map->bk, chain->tStart, chain->tEnd, chain);
    }
    lineFileClose(&lf);
    return chainHash;
}
static struct chromAnn* chromAnnChainReaderRead(struct chromAnnReader *car)
/* read a chromAnn object from a tab file or table */
{
struct chromAnnChainReader *carr = car->data;
struct chain *chain = chainRead(carr->lf);
if (chain == NULL)
    return NULL;

struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
    ca = chromAnnNew(chain->qName, '+', chain->tName,
                     ((car->opts & chromAnnSaveLines) ? chain : NULL),
                     chainRecWrite, chainRecFree);
else
    ca = chromAnnNew(chain->tName, chain->qStrand, chain->qName,
                     ((car->opts & chromAnnSaveLines) ? chain : NULL),
                     chainRecWrite, chainRecFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnUseQSide)
        chromAnnBlkNew(ca, chain->qStart, chain->qEnd);
    else
        chromAnnBlkNew(ca, chain->tStart, chain->tEnd);
    }
else    
    {
    if (car->opts & chromAnnUseQSide)
        addChainQBlocks(ca, car->opts, chain);
    else
        addChainTBlocks(ca, car->opts, chain);
    }
chromAnnFinish(ca);
if (!(car->opts & chromAnnSaveLines))
    chainFree(&chain);
return ca;
}
void chainPreNet(char *inFile, char *targetSizes, char *querySizes, 
	char *outFile)
/* chainPreNet - Remove chains that don't have a chance of being netted. */
{
struct hash *tHash = setupChroms(targetSizes);
struct hash *qHash = setupChroms(querySizes);
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct chain *chain;
double score, lastScore = 9e99;
struct chrom *qChrom, *tChrom;

lineFileSetMetaDataOutput(lf, f);
while ((chain = chainRead(lf)) != NULL)
    {
    /* Report progress. */
    dotOut();

    /* Check to make sure it really is sorted by score. */
    score = chain->score;
    if (score > lastScore)
       {
       errAbort("%s not sorted by score line %d", 
       		lf->fileName, lf->lineIx);
       }
    lastScore = score;

    /* Output chain if necessary and then free it. */
    qChrom = hashMustFindVal(qHash, chain->qName);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (chainUsed(chain, qChrom, tChrom) && inclQuery(chain))
	{
	chainWrite(chain, f);
	}
    chainFree(&chain);
    }
}
Exemplo n.º 19
0
void fbOrChain(Bits *acc, char *track, char *chrom, int chromSize)
/* Or in a chain file. */
{
struct lineFile *lf;
char fileName[512];
struct chain *chain;
struct cBlock *b;

chromFileName(track, chrom, fileName);
if (!fileExists(fileName))
    return;
lf = lineFileOpen(fileName, TRUE);
while ((chain = chainRead(lf)) != NULL)
    {
    for (b = chain->blockList; b != NULL; b = b->next)
        {
	int s = b->tStart, e = b->tEnd;
	if (s < 0) outOfRange(lf, chrom, chromSize);
	if (e > chromSize) outOfRange(lf, chrom, chromSize);
	bitSetRange(acc, b->tStart, b->tEnd - b->tStart);
	}
    chainFree(&chain);
    }
}
void liftChain(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .chain file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;
int dotMod = dots;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    char *source = sources[sourceIx];
    struct lineFile *lf = lineFileOpen(source, TRUE);
    struct chain *chain;
    lineFileSetMetaDataOutput(lf, f);
    verbose(1, "Lifting %s\n", source);
    while ((chain = chainRead(lf)) != NULL)
	{
	struct liftSpec *spec;
	char *seqName = querySide ? chain->qName : chain->tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
		{
		chainFree(&chain);
		continue;
		}
	    }
	else
	    {
	    struct cBlock *b = NULL;
	    int offset = spec->offset;
	    if (spec->strand == '-')
		{
		if (querySide)
		    {
		    int qSpan = chain->qEnd - chain->qStart;
		    if (chain->qStrand == '-')
		        chain->qStart += spec->offset;
		    else
		        {
			chain->qStart = spec->newSize - spec->offset 
				- (chain->qSize - chain->qStart);
			}
		    chain->qEnd = chain->qStart + qSpan;
		    chain->qStrand = flipStrand(chain->qStrand);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    /* We don't need to mess with the blocks here
		     * since they are all relative to the start. */
	            }
		else
		    {
		    /* We try and keep the target strand positive, so we end up
		     * flipping in both target and query and flipping the target
		     * strand. */
		    reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize);
		    reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize);
		    chain->qStrand = flipStrand(chain->qStrand);

		    /* Flip around blocks and add offset. */
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			reverseIntRange(&b->qStart, &b->qEnd, chain->qSize);
			reverseIntRange(&b->tStart, &b->tEnd, chain->tSize);
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    slReverse(&chain->blockList);

		    /* On target side add offset as well and update name and size. */
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    }
		}
	    else
		{
		if (querySide)
		    {
		    if (chain->qStrand == '-')
			offset = spec->newSize - (spec->offset + spec->oldSize);
		    freeMem(chain->qName);
		    chain->qName = cloneString(spec->newName);
		    chain->qSize = spec->newSize;
		    chain->qStart += offset;
		    chain->qEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->qStart += offset;
			b->qEnd   += offset;
			}
		    }
		else
		    {
		    freeMem(chain->tName);
		    chain->tName = cloneString(spec->newName);
		    chain->tSize = spec->newSize;
		    chain->tStart += offset;
		    chain->tEnd   += offset;
		    for (b=chain->blockList;  b != NULL;  b=b->next)
			{
			b->tStart += offset;
			b->tEnd   += offset;
			}
		    }
		}
	    }
	chainWrite(chain, f);
	chainFree(&chain);
	doDots(&dotMod);
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
void doChainScore(char *chainIn, char *tNibDir, char *qNibDir, char *chainOut)
{
char qStrand = 0, tStrand = 0;
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char *qName = "",  *tName = "";
FILE *f = mustOpen(chainOut, "w");
struct chain *chainList = NULL, *chain;
struct chain *inputChains, *next;
FILE *details = NULL;
struct lineFile *lf = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *chainHash = newHash(0);
char comment[1024];
FILE *faF;
struct seqPair *spList = NULL, *sp;
struct dyString *dy = newDyString(512);
struct lineFile *chainsLf = lineFileOpen(chainIn, TRUE);

while ((chain = chainRead(chainsLf)) != NULL)
    {
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    sp = hashFindVal(chainHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(chainHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(chain->qName);
	sp->tName = cloneString(chain->tName);
	sp->qStrand = chain->qStrand;
	}
    slAddHead(&sp->chain, chain);
    }
slSort(&spList, seqPairCmp);
lineFileClose(&chainsLf);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
        loadIfNewSeq(qNibDir, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
    loadIfNewSeq(tNibDir, sp->tName, '+', &tName, &tSeq, &tStrand);
    scorePair(sp, qSeq, tSeq, &chainList, sp->chain);
    }


slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}
Exemplo n.º 22
0
int main(int argc, char *argv[])
{
	FILE *f;
	struct chain *Chain;
	struct chain *SubChain, *chainToFree;
	struct chain *ch_p, *next_p;
	char buf[NUM_CHARS];
	struct lineFile *lf;
	int i = 0;
	int b = 0, e = 0;
	bool is_null = true;
	struct exons_list *homologs;
	int num_chains = 0;
	int num_homologs = 0;
	struct exons_list *repeats;
	int num_repeats = 0;
	char chr[LEN_NAME];

	strcpy(chr, "");
	if( argc == 3 ) {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
	}
	else if( argc != 4 ) {
		fatal("args: chain_file interval_text features_gff_file\n");
	}
	else {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
		
		if( (f = ckopen(argv[3], "r")) ) {
			while(fgets(buf, NUM_CHARS, f)) {
				i++;
			}
			num_repeats = i;
			repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list));
			init_exons(repeats, 0, num_repeats-1);	
			fseek(f, 0, SEEK_SET);
			assign_gff_exons_chr(f, repeats, num_repeats, chr);
			quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE);
		}
		else {
			fatalf("file %s invalid\n", argv[4]);
		}
		fclose(f);
	}

	lf = lineFileOpen(argv[1], true);
	Chain = chainRead(lf);
	ch_p = Chain;
	while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) {
		ch_p->next = next_p;
		ch_p = ch_p->next;
		i++;
	}

//	printf("Number of chains: %d\n", i);
	i = 0;
	ch_p = Chain;
//	while( (i < NUM_LOOPS) && (ch_p != NULL)  ) {
	while( ch_p != NULL  ) {
//		printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd);	
		ch_p = ch_p->next;
		i++;
	}

	num_chains = i;
	homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list));
	i = 0;
	f = ckopen(argv[2], "r");
	while( fgets(buf, NUM_CHARS, f) ) { 	
		if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) {
			fatalf("format errors: chr beg end in %s", buf);
		}
		else {
			ch_p = Chain;

			if( ch_p != NULL ) {
				while( (ch_p != NULL) && (is_null == true) ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					if( SubChain != NULL ) is_null = false;
					ch_p = ch_p->next;
				}
			}

			if( is_null == false ) {
				if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) {
					homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
					homologs[i].dir = SubChain->qStrand;
					strcpy(homologs[i].chr, SubChain->qName);
					i++;
				}
//				printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
				if( chainToFree != NULL ) {
					chainFree(&chainToFree);
				}

				while( ch_p != NULL ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					ch_p = ch_p->next;
					if( SubChain != NULL ) {
						if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) {
							if( SubChain->qStrand == '-' ) {
								homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart);
							}
							else {
								homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
							}
							homologs[i].dir = SubChain->qStrand;
							strcpy(homologs[i].chr, SubChain->qName);
							i++;
						}
//						printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
						if( chainToFree != NULL ) {
							chainFree(&chainToFree);
						}
					}
				}
			}
		}
	}

	num_homologs = i;
	selection_sort_exons(homologs, num_homologs);
//	print_exons_list(homologs, num_homologs);
	num_homologs = remove_redundant_intervals(homologs, num_homologs);
	print_exons_list(homologs, num_homologs);
	free(homologs);
	free(repeats);
	chainFreeList(&Chain);

	fclose(f);
	lineFileClose(&lf);

	return EXIT_SUCCESS;
}
void chainStitch(char *psls, char *chains, char *outChainName, char *outFoundName, char *outNotFoundName)
/* chainStitch - Stitch psls into chains. */
{
int lastChainId = -1;
struct psl *prevPsl, *nextPsl;
struct psl *fakePslList;
int jj;
int deletedBases, addedBases;
FILE *outFound = mustOpen(outFoundName, "w");
FILE *outNotFound = mustOpen(outNotFoundName, "w");
FILE *outChains = mustOpen(outChainName, "w");
struct lineFile *chainsLf = lineFileOpen(chains, TRUE);
struct cseqPair *cspList = NULL, *csp;
struct seqPair *spList = NULL, *sp;
struct lineFile *pslLf = pslFileOpen(psls);
struct dyString *dy = newDyString(512);
struct psl *psl;
struct hash *pslHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct hash *chainHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct chain *chain, *chainList = NULL;
struct cBlock *block , *nextBlock = NULL, *prevBlock = NULL;
int count;

count = 0;
while ((psl = pslNext(pslLf)) != NULL)
    {
    assert((psl->strand[1] == 0) || (psl->strand[1] == '+'));
    count++;
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", psl->qName, psl->strand[0], psl->tName);
    sp = hashFindVal(pslHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(pslHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(psl->qName);
	sp->tName = cloneString(psl->tName);
	sp->qStrand = psl->strand[0];
	}

    slAddHead(&sp->psl, psl);
    }
lineFileClose(&pslLf);
printf("read in  %d psls\n",count);

for(sp = spList; sp; sp = sp->next)
    slReverse(&sp->psl);
    //slSort(&sp->psl, pslCmpTarget);

count = 0;
while ((chain = chainRead(chainsLf)) != NULL)
    {
    if (chain->id > lastChainId)
	lastChainId = chain->id;
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    csp = hashFindVal(chainHash, dy->string);
    if (csp == NULL)
        {
	AllocVar(csp);
	slAddHead(&cspList, csp);
	hashAddSaveName(chainHash, dy->string, csp, &csp->name);
	csp->qName = cloneString(chain->qName);
	csp->tName = cloneString(chain->tName);
	csp->qStrand = chain->qStrand;
	}
    slAddHead(&csp->chain, chain);
    count++;
    }
lineFileClose(&chainsLf);
printf("read in %d chains\n",count);

for(csp = cspList; csp; csp = csp->next)
    {
    slSort(&csp->chain, chainCmpTarget);
//    csp->chain = aggregateChains(csp->chain);
    }

addedBases = deletedBases = 0;
for(sp = spList; sp; sp = sp->next)
    {
#ifdef NOTNOW
    /* find the chains associated with this strand */
    if ((csp = hashFindVal(chainHash, sp->name)) != NULL)
	{
	/* first check to see if psl blocks are in any chains */
	checkInChains(&sp->psl, &csp->chain, outFound, &addedBases);

	/* now extend chains to the right */
	checkAfterChains(&sp->psl, &csp->chain, outFound, &addedBases);

	/* now extend chains to the left */
	slReverse(&sp->psl);
	checkBeforeChains(&sp->psl, &csp->chain, outFound, &addedBases);
	}
#endif

    /* do we still have psl's */
    if (sp->psl != NULL)
	{
	/* make sure we have a chainList */
	chainList = NULL;
	if (csp == NULL)
	    {
	    AllocVar(csp);
	    slAddHead(&cspList, csp);
	    csp->qName = cloneString(sp->psl->qName);
	    csp->tName = cloneString(sp->psl->tName);
	    csp->qStrand = sp->psl->strand[0];
	    dyStringClear(dy);
	    dyStringPrintf(dy, "%s%c%s", csp->qName, csp->qStrand, csp->tName);
	    hashAddSaveName(chainHash, dy->string, csp, &csp->name);
	    }
	for(psl = sp->psl; psl ;  psl = nextPsl)
	    {
	    /* this psl will either fit a chain or make a new one */

	    nextPsl = psl->next;
	    sp->psl  = nextPsl;

	    psl->next = NULL;
	    fakePslList = psl;
	    if (chainList)
		checkInChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}
	//    if (chainList)
	//	checkAfterChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}
	    if (chainList)
		checkBeforeChains(&fakePslList, &chainList, outFound, &addedBases);
	    if (fakePslList == NULL)
		{
		//freez(&psl);
		continue;
		}

	    AllocVar(chain);
	    chain->tStart = psl->tStarts[0];
	    chain->qStart = psl->qStarts[0];
	    chain->tEnd = psl->tStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1];
	    chain->qEnd = psl->qStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1];
	    chain->tSize = psl->tSize;
	    chain->qSize = psl->qSize;
	    chain->qName = cloneString(psl->qName);
	    chain->tName = cloneString(psl->tName);
	    chain->qStrand = psl->strand[0];
	    chain->id = ++lastChainId;

	    if (!addPslToChain(chain, psl, &addedBases))
		errAbort("new ");
	    slAddHead(&chainList, chain);

	    pslTabOut(psl, outFound);
	    freez(&psl);
	    }

	csp->chain = slCat(csp->chain, chainList);
	}

    slSort(&csp->chain, chainCmpTarget);
    csp->chain = aggregateChains(csp->chain);
    }
fclose(outFound);
printf("deleted %d bases\n",deletedBases);
printf("added %d bases\n",addedBases);
count = 0;
for(sp = spList; sp; sp = sp->next)
    for(psl = sp->psl ; psl ; psl = psl->next)
	{
	pslTabOut(psl, outNotFound);
	count++;
	}
fclose(outNotFound);

printf("%d psls remain\n",count);

for(csp = cspList; csp; csp = csp->next)
    for(chain = csp->chain ; chain ; chain = chain->next)
	{
	//slSort(&chain->blockList, boxInCmpBoth);
	chain->tStart = chain->blockList->tStart;
	chain->qStart = chain->blockList->qStart;

	for(block = chain->blockList; block; block = block->next)
	    {
	    chain->tEnd = block->tEnd;
	    chain->qEnd = block->qEnd;
	    }
	chainWrite(chain,  outChains);
	}
fclose(outChains);

dyStringFree(&dy);
}
Exemplo n.º 24
0
void chainNet(char *chainFile, char *tSizes, char *qSizes, 
	char *tNet, char *qNet)
/* chainNet - Make alignment nets out of chains. */
{
struct lineFile *lf = lineFileOpen(chainFile, TRUE);
struct hash *qHash, *tHash;
struct chrom *qChromList, *tChromList, *tChrom, *qChrom;
struct chain *chain;
double lastScore = -1;
struct lm *lm = lmInit(0);
struct rbTreeNode **rbStack;
FILE *tNetFile = mustOpen(tNet, "w");
FILE *qNetFile = mustOpen(qNet, "w");


lmAllocArray(lm, rbStack, 256);
makeChroms(qSizes, lm, rbStack, &qHash, &qChromList);
makeChroms(tSizes, lm, rbStack, &tHash, &tChromList);
verbose(1, "Got %d chroms in %s, %d in %s\n", slCount(tChromList), tSizes,
       slCount(qChromList), qSizes);
lineFileSetMetaDataOutput(lf, tNetFile);
lineFileSetMetaDataOutput(lf, qNetFile);

/* Loop through chain file building up net. */
while ((chain = chainRead(lf)) != NULL)
    {
    /* Make sure that input is really sorted. */
    if (lastScore >= 0 && chain->score > lastScore)
        errAbort("%s must be sorted in order of score", chainFile);
    lastScore = chain->score;

    if (chain->score < minScore) 
	{
    	break;
	}
    verbose(2, "chain %f (%d els) %s %d-%d %c %s %d-%d\n", 
	    chain->score, slCount(chain->blockList), 
	    chain->tName, chain->tStart, chain->tEnd, 
	    chain->qStrand, chain->qName, chain->qStart, chain->qEnd);
    qChrom = hashMustFindVal(qHash, chain->qName);
    if (qChrom->size != chain->qSize)
        errAbort("%s is %d in %s but %d in %s", chain->qName, 
		chain->qSize, chainFile,
		qChrom->size, qSizes);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (tChrom->size != chain->tSize)
        errAbort("%s is %d in %s but %d in %s", chain->tName, 
		chain->tSize, chainFile,
		tChrom->size, tSizes);
    if (!inclQuery(chain))
        verbose(2, "skipping chain on query %s\n", chain->qName);
    else
        {
        addChain(qChrom, tChrom, chain);
        verbose(2, "%s has %d inserts, %s has %d\n", tChrom->name, 
                tChrom->spaces->n, qChrom->name, qChrom->spaces->n);
        }
    }
/* Build up other side of fills.  It's just for historical 
 * reasons this is not done during the main build up.   
 * It's a little less efficient this way, but to change it
 * some hard reverse strand issues would have to be juggled. */
verbose(1, "Finishing nets\n");
finishNet(qChromList, TRUE);
finishNet(tChromList, FALSE);

/* Write out basic net files. */
verbose(1, "writing %s\n", tNet);
outputNetSide(tChromList, tNetFile, FALSE);
verbose(1, "writing %s\n", qNet);
outputNetSide(qChromList, qNetFile, TRUE);

/* prevent SIGPIPE in preceding process if input is a pipe, consume remainder
 * of input file since we stop before EOF. */
if (isPipe(lf->fd))
    {
    char *line;
    while(lineFileNext(lf, &line, NULL))
        continue;
    }
lineFileClose(&lf);

if (verboseLevel() > 1)
    printMem(stderr);
}