Exemple #1
0
void hashFileList(char *fileList, struct hash *fileHash, struct hash *seqHash)
/* Read file list into hash */
{
if (twoBitIsFile(fileList))
    addTwoBit(fileList, fileHash, seqHash);
else if (endsWith(fileList, ".nib"))
    addNib(fileList, fileHash, seqHash);
else if (isFa(fileList))
    addFa(fileList, fileHash, seqHash);
else
    {
    struct lineFile *lf = lineFileOpen(fileList, TRUE);
    char *row[1];
    while (lineFileRow(lf, row))
        {
	char *file = row[0];
	if (twoBitIsFile(file))
	    addTwoBit(file, fileHash, seqHash);
	else if (endsWith(file, ".nib"))
	    addNib(file, fileHash, seqHash);
	else
	    addFa(file, fileHash, seqHash);
	}
    lineFileClose(&lf);
    }
}
Exemple #2
0
struct nibTwoCache *nibTwoCacheNew(char *pathName)
/* Get something that will more or less transparently get sequence from 
 * nib files or .2bit. */ 
{
struct nibTwoCache *ntc;
AllocVar(ntc);
ntc->pathName = cloneString(pathName);
ntc->isTwoBit = twoBitIsFile(pathName);
if (ntc->isTwoBit)
    ntc->tbf = twoBitOpen(pathName);
else
    ntc->nibHash = newHash(10);
return ntc;
}
Exemple #3
0
struct dnaSeq *nibTwoLoadOne(char *pathName, char *seqName)
/* Return sequence from a directory full of nibs or a .2bit file. 
 * The sequence will have repeats in lower case. */
{
struct dnaSeq *seq;
if (twoBitIsFile(pathName))
    {
    struct twoBitFile *tbf = twoBitOpen(pathName);
    seq = twoBitReadSeqFrag(tbf, seqName, 0, 0);
    twoBitClose(&tbf);
    }
else
    {
    char path[512];
    safef(path, sizeof(path), "%s/%s.nib", pathName, seqName);
    seq = nibLoadAllMasked(NIB_MASK_MIXED, path);
    }
return seq;
}
void twoBitMask(char *inName, char *maskName, char *outName)
/* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */
{
    struct hash *tbHash = hashNew(20);
    struct hash *bitmapHash = hashNew(20);
    struct twoBit *twoBitList = NULL;
    struct twoBit *twoBit = NULL;
    FILE *f = NULL;

    if (! twoBitIsFile(inName))
    {
        if (twoBitIsSpec(inName))
            errAbort("Sorry, this works only on whole .2bit files, not specs.");
        else
            errAbort("Input %s does not look like a proper .2bit file.", inName);
    }

    twoBitList = slurpInput(inName, tbHash, bitmapHash);

    /* Read mask data into bitmapHash, store it in twoBits: */
    if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed"))
        maskWithBed(maskName, tbHash, bitmapHash);
    else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out"))
        maskWithOut(maskName, tbHash, bitmapHash);
    else
        errAbort("Sorry, maskFile must end in \".bed\" or \".out\".");

    /* Create a new .2bit file, write it out from twoBits. */
    f = mustOpen(outName, "wb");
    twoBitWriteHeader(twoBitList, f);
    for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
        twoBitWriteOne(twoBit, f);
    }
    carefulClose(&f);

    /* Don't bother freeing twoBitList and hashes here -- just exit. */
}
Exemple #5
0
struct dnaLoadStack *dnaLoadStackNew(char *fileName)
/* Create new dnaLoadStack on composite file. */
{
struct dnaLoadStack *dls;
AllocVar(dls);
if (twoBitIsFile(fileName))
    {
    dls->twoBit = twoBitOpen(fileName);
    dls->tbi = dls->twoBit->indexList;
    }
else
    {
    char *line;
    dls->textFile = lineFileOpen(fileName, TRUE);
    if (lineFileNextReal(dls->textFile, &line))
        {
	line = trimSpaces(line);
	if (line[0] == '>')
	    dls->textIsFa = TRUE;
	lineFileReuse(dls->textFile);
	}
    }
return dls;
}
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut)
/* axtChain - Chain together axt alignments.. */
{
struct hash *pairHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct seqPair *spList = NULL, *sp;
FILE *f = mustOpen(chainOut, "w");
char *qName = "",  *tName = "";
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char qStrand = 0, tStrand = 0;
struct chain *chainList = NULL, *chain;
FILE *details = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *tFaHash = newHash(0);
FILE *faF;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain");

if (detailsName != NULL)
    details = mustOpen(detailsName, "w");
/* Read input file and divide alignments into various parts. */
if (optionExists("psl"))
    spList = readPslBlocks(axtIn, pairHash, f);
else
    spList = readAxtBlocks(axtIn, pairHash, f);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
if (optionExists("faT"))
    {
    faF = mustOpen(tNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(tFaHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    slReverse(&sp->blockList);
    removeExactOverlaps(&sp->blockList);
    verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList));
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
	{
        loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, 
		&qName, &qSeq, &qStrand);
        }
    if (optionExists("faT"))
        {
        assert (tFaHash != NULL);
        loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand);
        }
    else 
	{
        loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', 
		&tName, &tSeq, &tStrand);
	}
    chainPair(sp, qSeq, tSeq, &chainList, details);
    }
slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}
Exemple #7
0
void outputBlocks(struct lineFile *lf,
	struct block *blockList, int score, FILE *f, boolean isRc, 
	char *qName, int qSize, char *qNibDir, struct dlList *qCache,
	char *tName, int tSize, char *tNibDir, struct dlList *tCache,
	boolean rescore)
/* Output block list as an axt to file f. */
{
int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0;
struct block *lastBlock = NULL;
struct block *block;
struct dyString *qSym = newDyString(16*1024);
struct dyString *tSym = newDyString(16*1024);
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL;
struct axt axt;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

if (blockList == NULL)
    return;

/* Figure overall dimensions. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (qStart > block->qStart) qStart = block->qStart;
    if (qEnd < block->qEnd) qEnd = block->qEnd;
    if (tStart > block->tStart) tStart = block->tStart;
    if (tEnd < block->tEnd) tEnd = block->tEnd;
    }

/* Load sequence covering alignment from nib files. */
if (isRc)
    {
    reverseIntRange(&qStart, &qEnd, qSize);
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
            if (sameString(qName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(qSeq);
            qSeq->size = qEnd - qStart;
            qSeq->name = cloneString(qName);
            qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size);
            }
        else
            errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    reverseIntRange(&qStart, &qEnd, qSize);
    reverseComplement(qSeq->dna, qSeq->size);
    }
else
    {    
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
	    {
            if (sameString(qName, seq->name))
                break;
	    }
	if (seq != NULL)
	    {
	    AllocVar(qSeq);
	    qSeq->size = qEnd - qStart;
	    qSeq->name = cloneString(qName);
	    qSeq->dna = (seq->dna)+qStart;
	    }
	else
	    errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    }
    if (tIsFa)
        {
        for (seq = tFaList ; seq != NULL ; seq = seq->next)
            if (sameString(tName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(tSeq);
            tSeq->size = tEnd - tStart;
            tSeq->name = cloneString(tName);
            tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size);
            }
        else
            errAbort("sequence not found %s\n",tName);
        }
    else
        tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit);

/* Loop through blocks copying sequence into dynamic strings. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (lastBlock != NULL)
        {
	int qGap = block->qStart - lastBlock->qEnd;
	int tGap = block->tStart - lastBlock->tEnd;
	if (qGap != 0 && tGap != 0)
	    {
	    errAbort("Gaps in both strand on alignment ending line %d of %s",
	    	lf->lineIx, lf->fileName);
	    }
	if (qGap > 0)
	    {
	    dyStringAppendMultiC(tSym, '-', qGap);
	    dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap);
	    }
	if (tGap > 0)
	    {
	    dyStringAppendMultiC(qSym, '-', tGap);
	    dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap);
	    }
	}
    if (qSeq->size < block->qStart - qStart)
        {
        errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd );
        }
    dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart,
    	block->qEnd - block->qStart);
    if (tSeq->size < block->tStart - tStart)
        {
        errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart);
        }
    dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart,
    	block->tEnd - block->tStart);
    lastBlock = block;
    }
if (qSym->stringSize != tSym->stringSize)
    errAbort("qSize and tSize don't agree in alignment ending line %d of %s",
	    lf->lineIx, lf->fileName);

if (rescore)
    score = axtScoreSym(scoreScheme, qSym->stringSize,
			qSym->string, tSym->string);

/* Fill in an axt and write it to output. */
ZeroVar(&axt);
axt.qName = qName;
axt.qStart = qStart;
axt.qEnd = qEnd;
axt.qStrand = (isRc ? '-' : '+');
axt.tName = tName;
axt.tStart = tStart;
axt.tEnd = tEnd;
axt.tStrand = '+';
axt.score = score;
axt.symCount = qSym->stringSize;
axt.qSym = qSym->string;
axt.tSym = tSym->string;
axtWrite(&axt, f);

/* Clean up. */
if (!qIsFa)
    freeDnaSeq(&qSeq);
freeDnaSeq(&tSeq);
dyStringFree(&qSym);
dyStringFree(&tSym);
}