示例#1
0
static void axtQueryOut(struct gfOutput *out, FILE *f)
/* Do axt oriented output - at end of processing query. */
{
    struct axtData *aod = out->data;
    struct axtBundle *gab;
    for (gab = aod->bundleList; gab != NULL; gab = gab->next)
    {
        struct axt *axt;
        for (axt = gab->axtList; axt != NULL; axt = axt->next)
            axtWrite(axt, f);
    }
    axtBundleFreeList(&aod->bundleList);
}
void writeAxtFromChain(struct chain *chain, struct dnaSeq *qSeq, int qOffset,
	struct dnaSeq *tSeq, int tOffset, FILE *f, FILE *gapFile)
/* Write out axt's that correspond to chain. */
{
struct axt *axt, *axtList;

if (gapFile != NULL)
    writeGaps(chain, gapFile);
axtList = chainToAxt(chain, qSeq, qOffset, tSeq, tOffset, maxGap, BIGNUM);
verbose(9, "%d axts\n", slCount(axtList));
for (axt = axtList; axt != NULL; axt = axt->next)
    axtWrite(axt, f);
axtFreeList(&axtList);
}
void outputSubAxt(struct axt *axt, int start, int size, int score, FILE *f)
/* Output subset of axt to axt file. */
{
struct axt a;
a = *axt;
a.symCount = size;
a.score = score;
a.qStart += countNonDash(a.qSym, start);
a.qEnd = a.qStart + countNonDash(a.qSym + start, size);
a.tStart += countNonDash(a.tSym, start);
a.tEnd = a.tStart + countNonDash(a.tSym + start, size);
a.qSym += start;
a.tSym += start;
axtWrite(&a, f);
}
示例#4
0
void axtSplitByTarget(char *inName, char *outDir)
/* axtSplitByTarget - Split a single axt file into one file per target. */
{
struct hash *outHash = newHash(8);  /* FILE valued hash */
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct axt *axt;

makeDir(outDir);
while ((axt = axtRead(lf)) != NULL)
    {
    FILE *f = getSplitFile(outHash, outDir, axt->tName, axt->tStart);
    axtWrite(axt, f);
    totalWritten += strlen(axt->tName) + strlen(axt->qName) + 40 + strlen(axt->qSym)+ strlen(axt->tSym);
    axtFree(&axt);
    }
}
示例#5
0
void axtDropSelf(char *inFile, char *outFile)
/* axtDropSelf - Drop alignments that just align same thing to itself. */
{
FILE *f = mustOpen(outFile, "w");
struct lineFile *lf = lineFileOpen(inFile, TRUE);
struct axt *axt;
while ((axt = axtRead(lf)) != NULL)
    {
    if (axt->qStart != axt->tStart || axt->qEnd != axt->tEnd ||
        axt->qStrand != axt->tStrand || !sameString(axt->qName, axt->tName))
	{
	axtWrite(axt,f);
	}
    axtFree(&axt);
    }
}
void axtSwapFile(char *source, char *targetSizes, char *querySizes, char *dest)
/* axtSwapFile - Swap source and query in an axt file. */
{
    struct hash *tHash = loadIntHash(targetSizes);
    struct hash *qHash = loadIntHash(querySizes);
    struct lineFile *lf = lineFileOpen(source, TRUE);
    FILE *f = mustOpen(dest, "w");
    struct axt *axt;

    while ((axt = axtRead(lf)) != NULL)
    {
        axtSwap(axt, hashIntVal(tHash, axt->tName), hashIntVal(qHash, axt->qName));
        axtWrite(axt, f);
        axtFree(&axt);
    }
}
void axtRescore(char *in, char *out)
/* axtRescore - Recalculate scores in axt. */
{
struct lineFile *lf = lineFileOpen(in, TRUE);
FILE *f = mustOpen(out, "w");
struct axt *axt;

lineFileSetMetaDataOutput(lf, f);
axtScoreSchemeDnaWrite(scoreScheme, f, "axtRescore");
for (;;)
    {
    axt = axtRead(lf);
    if (axt == NULL)
        break;
    axt->score = axtScore(axt, scoreScheme);
    axtWrite(axt, f);
    axtFree(&axt);
    }
}
void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName)
/* used for cleaning up self alignments - deletes all overlapping self alignments */
{
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct axt *axt;
int totMatch = 0;
int totSkip = 0;
int totLines = 0;

while ((axt = axtRead(lf)) != NULL)
    {
    totLines++;
    totMatch += axt->score;
	if (sameString(axt->qName, axt->tName))
        {
        int qs = axt->qStart;
        int qe = axt->qEnd;
        if (axt->qStrand == '-')
            reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName));
        if (axt->tStart == qs && axt->tEnd == qe) 
            {
            /*
            printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n",
              axt->qStrand,
              axt->qName, axt->symCount, axt->qStart, axt->qEnd,
              axt->tName, axt->symCount, axt->tStart, axt->tEnd
              );
              */
            totSkip++;
            continue;
            }
        }
    axtWrite(axt, f);

    axtFree(&axt);
    }
fclose(f);
lineFileClose(&lf);
}
static void doAChain(struct chain *chain, struct nibTwoCache *tSeqCache, struct nibTwoCache *qSeqCache,
                     FILE *f)
/* Convert one chain to an axt. */
{
struct dnaSeq *qSeq = loadSeqStrand(qSeqCache, chain->qName, chain->qStart, chain->qEnd, chain->qStrand);
struct dnaSeq *tSeq = loadSeqStrand(tSeqCache, chain->tName, chain->tStart, chain->tEnd, '+');
struct axt *axtList= chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM);
struct axt *axt = NULL;

for (axt = axtList; axt != NULL; axt = axt->next)
    {
    double idRatio = axtIdRatio(axt);
    if (minIdRatio <= idRatio)
        {
        if (bedOut)
            bedWriteAxt(axt, chain->qSize, chain->tSize, idRatio, f);
        else
            axtWrite(axt, f);
        }
    }
axtFreeList(&axtList);
freeDnaSeq(&qSeq);
freeDnaSeq(&tSeq);
}
void liftAxt(char *destFile, struct hash *liftHash, 
	int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .axt file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;
int dotMod = dots;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    char *source = sources[sourceIx];
    struct lineFile *lf = lineFileOpen(source, TRUE);
    struct axt *axt;
    lineFileSetMetaDataOutput(lf, f);
    verbose(1, "Lifting %s\n", source);
    while ((axt = axtRead(lf)) != NULL)
        {
	struct liftSpec *spec;
	struct axt a = *axt;
	char *seqName;
	if (querySide)
	    seqName = a.qName;
	else
	    seqName = a.tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
	        {
		axtFree(&axt);
		continue;
		}
	    }
	else
	    {
	    int offset;
	    char strand = (querySide ? a.qStrand : a.tStrand);
	    cantHandleSpecRevStrand(spec);
	    if (strand == '-')
		{
		int ctgEnd = spec->offset + spec->oldSize;
		offset = spec->newSize - ctgEnd;
		}
	    else
		offset = spec->offset;
	    if (querySide)
	        {
		a.qStart += offset;
		a.qEnd += offset;
		a.qName = spec->newName;
		}
	    else
	        {
		a.tStart += offset;
		a.tEnd += offset;
		a.tName = spec->newName;
		if (strand == '-')
                    warn("Target minus strand, please double check results.");
                }
            }
        axtWrite(&a, f);
        axtFree(&axt);
        doDots(&dotMod);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
示例#11
0
void outputBlocks(struct lineFile *lf,
	struct block *blockList, int score, FILE *f, boolean isRc, 
	char *qName, int qSize, char *qNibDir, struct dlList *qCache,
	char *tName, int tSize, char *tNibDir, struct dlList *tCache,
	boolean rescore)
/* Output block list as an axt to file f. */
{
int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0;
struct block *lastBlock = NULL;
struct block *block;
struct dyString *qSym = newDyString(16*1024);
struct dyString *tSym = newDyString(16*1024);
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL;
struct axt axt;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

if (blockList == NULL)
    return;

/* Figure overall dimensions. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (qStart > block->qStart) qStart = block->qStart;
    if (qEnd < block->qEnd) qEnd = block->qEnd;
    if (tStart > block->tStart) tStart = block->tStart;
    if (tEnd < block->tEnd) tEnd = block->tEnd;
    }

/* Load sequence covering alignment from nib files. */
if (isRc)
    {
    reverseIntRange(&qStart, &qEnd, qSize);
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
            if (sameString(qName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(qSeq);
            qSeq->size = qEnd - qStart;
            qSeq->name = cloneString(qName);
            qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size);
            }
        else
            errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    reverseIntRange(&qStart, &qEnd, qSize);
    reverseComplement(qSeq->dna, qSeq->size);
    }
else
    {    
    if (qIsFa)
        {
        for (seq = qFaList ; seq != NULL ; seq = seq->next)
	    {
            if (sameString(qName, seq->name))
                break;
	    }
	if (seq != NULL)
	    {
	    AllocVar(qSeq);
	    qSeq->size = qEnd - qStart;
	    qSeq->name = cloneString(qName);
	    qSeq->dna = (seq->dna)+qStart;
	    }
	else
	    errAbort("sequence not found %s\n",qName);
        }
    else
        qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
    }
    if (tIsFa)
        {
        for (seq = tFaList ; seq != NULL ; seq = seq->next)
            if (sameString(tName, seq->name))
                break;
        if (seq != NULL)
            {
            AllocVar(tSeq);
            tSeq->size = tEnd - tStart;
            tSeq->name = cloneString(tName);
            tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size);
            }
        else
            errAbort("sequence not found %s\n",tName);
        }
    else
        tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit);

/* Loop through blocks copying sequence into dynamic strings. */
for (block = blockList; block != NULL; block = block->next)
    {
    if (lastBlock != NULL)
        {
	int qGap = block->qStart - lastBlock->qEnd;
	int tGap = block->tStart - lastBlock->tEnd;
	if (qGap != 0 && tGap != 0)
	    {
	    errAbort("Gaps in both strand on alignment ending line %d of %s",
	    	lf->lineIx, lf->fileName);
	    }
	if (qGap > 0)
	    {
	    dyStringAppendMultiC(tSym, '-', qGap);
	    dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap);
	    }
	if (tGap > 0)
	    {
	    dyStringAppendMultiC(qSym, '-', tGap);
	    dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap);
	    }
	}
    if (qSeq->size < block->qStart - qStart)
        {
        errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd );
        }
    dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart,
    	block->qEnd - block->qStart);
    if (tSeq->size < block->tStart - tStart)
        {
        errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart);
        }
    dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart,
    	block->tEnd - block->tStart);
    lastBlock = block;
    }
if (qSym->stringSize != tSym->stringSize)
    errAbort("qSize and tSize don't agree in alignment ending line %d of %s",
	    lf->lineIx, lf->fileName);

if (rescore)
    score = axtScoreSym(scoreScheme, qSym->stringSize,
			qSym->string, tSym->string);

/* Fill in an axt and write it to output. */
ZeroVar(&axt);
axt.qName = qName;
axt.qStart = qStart;
axt.qEnd = qEnd;
axt.qStrand = (isRc ? '-' : '+');
axt.tName = tName;
axt.tStart = tStart;
axt.tEnd = tEnd;
axt.tStrand = '+';
axt.score = score;
axt.symCount = qSym->stringSize;
axt.qSym = qSym->string;
axt.tSym = tSym->string;
axtWrite(&axt, f);

/* Clean up. */
if (!qIsFa)
    freeDnaSeq(&qSeq);
freeDnaSeq(&tSeq);
dyStringFree(&qSym);
dyStringFree(&tSym);
}