Beispiel #1
0
static struct ffAli *ffNextBreak(struct ffAli *ff, int maxInsert,
                                 bioSeq *tSeq, struct trans3 *t3List)
/* Return ffAli after first gap in either sequence longer than maxInsert,
 * or after first gap in both sequences.  Return may legitimately
 * be NULL. */
{
    struct ffAli *rt = ff->right;
    int hGap, nGap;
    int nhStart, ohEnd;
    for (;;)
    {
        if (rt == NULL)
            break;
        nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE);
        ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE);
        hGap = nhStart - ohEnd;
        nGap = rt->nStart - ff->nEnd;
        if (hGap != 0 && nGap != 0)
            break;
        if (hGap < 0 || nGap < 0)
            break;
        if (hGap > maxInsert || nGap > maxInsert)
            break;
        ff = rt;
        rt = ff->right;
    }
    return rt;
}
Beispiel #2
0
struct ffAli *cutAtBigIntrons(struct ffAli *ffList, int maxIntron, 
	int *pScore, enum ffStringency stringency, 
	boolean isProt, bioSeq *tSeq, struct trans3 *t3List,
	struct ffAli **returnLeftovers)
/* Return ffList up to the first intron that's too big.
 * Put the rest of the blocks back onto the leftovers list. */
{
struct ffAli *prevFf, *ff, *cutFf = NULL;
prevFf = ffList;
for (ff = prevFf->right; ff != NULL; ff = ff->right)
    {
    int nhStart = trans3GenoPos(    ff->hStart, tSeq, t3List, FALSE);
    int ohEnd   = trans3GenoPos(prevFf->hEnd  , tSeq, t3List, TRUE);
    int dt = nhStart - ohEnd;
    if (dt > maxIntron)
        {
	cutFf = prevFf;
	break;
	}
    prevFf = ff;
    }
if (cutFf != NULL)
    {
    ff = cutFf->right;
    cutFf->right = NULL;
    ff->left = NULL;
    ffCat(returnLeftovers, &ff);
    if (isProt)
	*pScore = ffScoreProtein(ffList, stringency);
    else
	*pScore = ffScore(ffList, stringency);
    }
return ffList;
}
Beispiel #3
0
static int scoreAli(struct ffAli *ali, boolean isProt, 
	enum ffStringency stringency, 
	struct dnaSeq *tSeq, struct trans3 *t3List)
/* Score alignment. */
{
int (*scoreFunc)(char *a, char *b, int size);
struct ffAli *ff, *nextFf;
int score = 0;
if (isProt) 
    scoreFunc = aaScoreMatch;
else
    scoreFunc = dnaScoreMatch;
for (ff = ali; ff != NULL; ff = nextFf)
    {
    nextFf = ff->right;
    score += scoreFunc(ff->nStart, ff->hStart, ff->nEnd-ff->nStart);
    if (nextFf != NULL)
        {
	int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE);
	int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE);
	int hGap = nhStart - ohEnd;
	int nGap = nextFf->nStart - ff->nEnd;
	score -= ffCalcGapPenalty(hGap, nGap, stringency);
	}
    }
return score;
}
Beispiel #4
0
void dumpFf(struct ffAli *left, bioSeq *qSeq, bioSeq *tSeq, struct trans3 *t3List)
/* Print info on ffAli. */
{
struct ffAli *ff;
for (ff = left; ff != NULL; ff = ff->right)
    {
    int hStart = trans3GenoPos(ff->hStart, tSeq, t3List, FALSE);
    int hEnd   = trans3GenoPos(ff->hEnd  , tSeq, t3List, TRUE);

    printf("(%d - %d)[%ld-%ld] ", hStart, hEnd,
	(long)(ff->nStart - qSeq->dna), (long)(ff->nEnd - qSeq->dna));
    }
printf("\n");
}
int trans3Frame(char *pt, struct trans3 *t3List)
/* Figure out which frame pt is in or 0 if no frame. */
{
if (t3List == NULL)
    return 0;
else
    return 1 + trans3GenoPos(pt, NULL, t3List, FALSE)%3;
}
Beispiel #6
0
static void savePslx(char *chromName, int chromSize, int chromOffset,
                     struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq,
                     boolean isRc, enum ffStringency stringency, int minMatch, FILE *f,
                     struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc,
                     struct hash *maskHash, int minIdentity,
                     boolean qIsProt, boolean tIsProt, boolean saveSeq)
/* Analyse one alignment and if it looks good enough write it out to file in
 * psl format (or pslX format - if saveSeq is TRUE).  */
{
    /* This function was stolen from psLayout and slightly extensively to cope
     * with protein as well as DNA aligments. */
    struct ffAli *ff, *nextFf;
    struct ffAli *right = ffRightmost(ali);
    DNA *needle = qSeq->dna;
    DNA *hay = tSeq->dna;
    int nStart = ali->nStart - needle;
    int nEnd = right->nEnd - needle;
    int hStart, hEnd;
    int nInsertBaseCount = 0;
    int nInsertCount = 0;
    int hInsertBaseCount = 0;
    int hInsertCount = 0;
    int matchCount = 0;
    int mismatchCount = 0;
    int repMatch = 0;
    int countNs = 0;
    DNA *np, *hp, n, h;
    int blockSize;
    int i;
    struct trans3 *t3List = NULL;
    Bits *maskBits = NULL;

    if (maskHash != NULL)
        maskBits = hashMustFindVal(maskHash, tSeq->name);
    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset;
    hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset;

    /* Count up matches, mismatches, inserts, etc. */
    for (ff = ali; ff != NULL; ff = nextFf)
    {
        nextFf = ff->right;
        blockSize = ff->nEnd - ff->nStart;
        np = ff->nStart;
        hp = ff->hStart;
        for (i=0; i<blockSize; ++i)
        {
            n = np[i];
            h = hp[i];
            if (n == 'n' || h == 'n')
                ++countNs;
            else
            {
                if (n == h)
                {
                    if (maskBits != NULL)
                    {
                        int seqOff = hp + i - hay;
                        if (bitReadOne(maskBits, seqOff))
                            ++repMatch;
                        else
                            ++matchCount;
                    }
                    else
                        ++matchCount;
                }
                else
                    ++mismatchCount;
            }
        }
        if (nextFf != NULL)
        {
            int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset;
            int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset;
            int hGap = nhStart - ohEnd;
            int nGap = nextFf->nStart - ff->nEnd;

            if (nGap != 0)
            {
                ++nInsertCount;
                nInsertBaseCount += nGap;
            }
            if (hGap != 0)
            {
                ++hInsertCount;
                hInsertBaseCount += hGap;
            }
        }
    }


    /* See if it looks good enough to output, and output. */
    /* if (score >= minMatch) Moved to higher level */
    {
        int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount);
        int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount);
        if (id >= minIdentity)
        {
            if (isRc)
            {
                int temp;
                int oSize = qSeq->size;
                temp = nStart;
                nStart = oSize - nEnd;
                nEnd = oSize - temp;
            }
            if (targetIsRc)
            {
                int temp;
                temp = hStart;
                hStart = chromSize - hEnd;
                hEnd = chromSize - temp;
            }
            fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c",
                    matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount,
                    (isRc ? '-' : '+'));
            if (reportTargetStrand)
                fprintf(f, "%c", (targetIsRc ? '-' : '+') );
            fprintf(f, "\t%s\t%d\t%d\t%d\t"
                    "%s\t%d\t%d\t%d\t%d\t",
                    qSeq->name, qSeq->size, nStart, nEnd,
                    chromName, chromSize, hStart, hEnd,
                    ffAliCount(ali));
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nStart - needle));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset);
            if (saveSeq)
            {
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->nStart, ff->nEnd - ff->nStart);
                    fputc(',', f);
                }
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->hStart, ff->hEnd - ff->hStart);
                    fputc(',', f);
                }
            }
            fprintf(f, "\n");
            if (ferror(f))
            {
                perror("");
                errAbort("Write error to .psl");
            }
        }
    }
}
Beispiel #7
0
static void saveAxtBundle(char *chromName, int chromSize, int chromOffset,
                          struct ffAli *ali,
                          struct dnaSeq *tSeq, struct hash *t3Hash, struct dnaSeq *qSeq,
                          boolean qIsRc, boolean tIsRc,
                          enum ffStringency stringency, int minMatch, struct gfOutput *out)
/* Save alignment to axtBundle. */
{
    struct axtData *ad = out->data;
    struct ffAli *sAli, *eAli, *ff, *rt, *eFf = NULL;
    struct axt *axt;
    struct dyString *q = newDyString(1024), *t = newDyString(1024);
    struct axtBundle *gab;
    struct trans3 *t3List = NULL;

    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    AllocVar(gab);
    gab->tSize = chromSize;
    gab->qSize = qSeq->size;
    for (sAli = ali; sAli != NULL; sAli = eAli)
    {
        eAli = ffNextBreak(sAli, 8, tSeq, t3List);
        dyStringClear(q);
        dyStringClear(t);
        for (ff = sAli; ff != eAli; ff = ff->right)
        {
            dyStringAppendN(q, ff->nStart, ff->nEnd - ff->nStart);
            dyStringAppendN(t, ff->hStart, ff->hEnd - ff->hStart);
            rt = ff->right;
            if (rt != eAli)
            {
                int nGap = rt->nStart - ff->nEnd;
                int nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE)
                              + chromOffset;
                int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE)
                            + chromOffset;
                int hGap = nhStart - ohEnd;
                int gap = max(nGap, hGap);
                if (nGap < 0 || hGap < 0)
                {
                    errAbort("Negative gap size in %s vs %s", tSeq->name, qSeq->name);
                }
                if (nGap == gap)
                {
                    dyStringAppendN(q, ff->nEnd, gap);
                    dyStringAppendMultiC(t, '-', gap);
                }
                else
                {
                    dyStringAppendN(t, ff->hEnd, gap);
                    dyStringAppendMultiC(q, '-', gap);
                }
            }
            eFf = ff;	/* Keep track of last block in bunch */
        }
        assert(t->stringSize == q->stringSize);
        AllocVar(axt);
        axt->qName = cloneString(qSeq->name);
        axt->qStart = sAli->nStart - qSeq->dna;
        axt->qEnd = eFf->nEnd - qSeq->dna;
        axt->qStrand = (qIsRc ? '-' : '+');
        axt->tName = cloneString(chromName);
        axt->tStart = trans3GenoPos(sAli->hStart, tSeq, t3List, FALSE) + chromOffset;
        axt->tEnd = trans3GenoPos(eFf->hEnd, tSeq, t3List, TRUE) + chromOffset;
        axt->tStrand = (tIsRc ? '-' : '+');
        axt->symCount = t->stringSize;
        axt->qSym = cloneString(q->string);
        axt->tSym = cloneString(t->string);
        axt->frame = trans3Frame(sAli->hStart, t3List);
        if (out->qIsProt)
            axt->score = axtScoreProteinDefault(axt);
        else
            axt->score = axtScoreDnaDefault(axt);
        slAddHead(&gab->axtList, axt);
    }
    slReverse(&gab->axtList);
    dyStringFree(&q);
    dyStringFree(&t);
    slAddHead(&ad->bundleList, gab);
}