Example #1
0
struct ffAli *smallMiddleExons(struct ffAli *aliList, 
	struct ssBundle *bundle, 
	enum ffStringency stringency)
/* Look for small exons in the middle. */
{
if (bundle->t3List != NULL)
    return aliList;	/* Can't handle intense translated stuff. */
else
    {
    struct dnaSeq *qSeq =  bundle->qSeq;
    struct dnaSeq *genoSeq = bundle->genoSeq;
    struct ffAli *right, *left = NULL, *newLeft, *newRight;

    left = aliList;
    for (right = aliList->right; right != NULL; right = right->right)
        {
	if (right->hStart - left->hEnd >= 3 && right->nStart - left->nEnd >= 3)
	    {
	    newLeft = ffFind(left->nEnd, right->nStart, left->hEnd, right->hStart, stringency);
	    if (newLeft != NULL)
	        {
		newLeft = forceMonotonic(newLeft, qSeq, genoSeq, 
		    stringency, bundle->isProt, bundle->t3List );
		newRight = ffRightmost(newLeft);
                if (left != NULL)
                    {
                    left->right = newLeft;
                    newLeft->left = left;
                    }
                else
                    {
                    aliList = newLeft;
                    }
                if (right != NULL)
                    {
                    right->left = newRight;
                    newRight->right = right;
                    }
		}
	    }
	left = right;
	}
    }
return aliList;
}
Example #2
0
void showBundle(struct ssBundle *bun, boolean isRc)
/* Display a bundle for user. */
{
struct ssFfItem *ffi;

for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next)
    {
    struct ffAli *left, *right;
    int score;
    DNA *needle = bun->qSeq->dna;
    DNA *hay = bun->genoSeq->dna;
    left = ffi->ff;
    right = ffRightmost(left);
    score = ffScore(left, ffTight);
    printf("%s:%d-%d of %d %s:%d-%d of %d strand %c score %d\n",
	bun->genoSeq->name, left->hStart - hay, right->hEnd - hay, bun->genoSeq->size,
	bun->qSeq->name, left->nStart - needle, right->nEnd - needle, bun->qSeq->size,	
	(isRc ? '-' : '+'), score);
    }
}
Example #3
0
static boolean smoothOneGap(struct ffAli *left, struct ffAli *right, struct ffAli *ffList)
/* If and necessary connect left and right - either directly or
 * with a small intermediate ffAli inbetween.  Do not bother to
 * merge directly abutting regions,  this happens later.  Returns
 * TRUE if any smoothing done. */ 
{
int nGap = right->nStart - left->nEnd;
int hGap = right->hStart - left->hEnd;
if (nGap > 0 && hGap > 0 && nGap < 10 && hGap < 10)
    {
    int sizeDiff = nGap - hGap;
    if (sizeDiff < 0) sizeDiff = -sizeDiff;
    if (sizeDiff <= 3)
	{
	struct axtScoreScheme *ss = axtScoreSchemeRnaDefault();
	char hSym[20], nSym[20];
	int symCount;
	if (bandExt(TRUE, ss, 3, left->nEnd, nGap, left->hEnd, hGap, 1,
		sizeof(hSym), &symCount, nSym, hSym, NULL, NULL))
	    {
	    int gapPenalty = -ffCalcCdnaGapPenalty(hGap, nGap) * ss->matrix['a']['a'];
	    int score = axtScoreSym(ss, symCount, nSym, hSym);
	    if (score >= gapPenalty)
		{
		struct ffAli *l, *r;
		l = ffAliFromSym(symCount, nSym, hSym, NULL, left->nEnd, left->hEnd);
		r = ffRightmost(l);
		left->right = l;
		l->left = left;
		r->right = right;
		right->left = r;
		return TRUE;
		}
	    }
	}
    }
return FALSE;
}
Example #4
0
static struct ffAli *trimFlakyEnds(struct dnaSeq *qSeq, struct dnaSeq *tSeq,
	struct ffAli *ffList)
/* Get rid of small initial and terminal exons that seem to just
 * be chance alignments.  Looks for splice sites and non-degenerate
 * sequence to keep things. */
{
int orientation = ffIntronOrientation(ffList);
struct ffAli *left, *right;
char *iStart, *iEnd;
int blockScore, gapPenalty;

/* If one or less block then don't bother. */
if (ffAliCount(ffList) < 2)
    return ffList;

/* Trim beginnings. */
left = ffList;
right = ffList->right;
while (right != NULL)
    {
    blockScore = ffScoreMatch(left->nStart, left->hStart, 
    	left->nEnd-left->nStart);
    blockScore -= aPenalty(left->nStart, left->nEnd - left->nStart);
    iStart = left->hEnd;
    iEnd = right->hStart;
    gapPenalty = trimGapPenalty(iEnd-iStart, 
    	right->nStart - left->nEnd, iStart, iEnd, orientation);
    if (gapPenalty >= blockScore)
        {
	freeMem(left);
	ffList = right;
	right->left = NULL;
	}
    else
        break;
    left = right;
    right = right->right;
    }

right = ffRightmost(ffList);
if (right == ffList)
    return ffList;
left = right->left;
while (left != NULL)
    {
    blockScore = ffScoreMatch(right->nStart, right->hStart, 
    	right->nEnd-right->nStart);
    blockScore -= aPenalty(right->nStart, right->nEnd - right->nStart);
    iStart = left->hEnd;
    iEnd = right->hStart;
    gapPenalty = trimGapPenalty(iEnd-iStart, 
    	right->nStart - left->nEnd, iStart, iEnd, orientation);
    if (gapPenalty >= blockScore)
        {
	freeMem(right);
	left->right = NULL;
	}
    else
        break;
    right = left;
    left = left->left;
    }
return ffList;
}
void oneAli(struct ffAli *left, struct dnaSeq *otherSeq, 
	struct repeatTracker *rt, boolean isRc, enum ffStringency stringency, FILE *out)
/* Analyse one alignment and if it looks good enough write it out to file. */
{
struct dnaSeq *genoSeq = rt->seq;
UBYTE *repBytes = rt->repBytes;
struct ffAli *ff, *nextFf;
struct ffAli *right = ffRightmost(left);
DNA *needle = otherSeq->dna;
DNA *hay = genoSeq->dna;
int nStart = left->nStart - needle;
int nEnd = right->nEnd - needle;
int hStart = left->hStart - hay;
int hEnd = right->hEnd - hay;
int nSize = nEnd - nStart;
int hSize = hEnd - hStart;
int nInsertBaseCount = 0;
int nInsertCount = 0;
int hInsertBaseCount = 0;
int hInsertCount = 0;
int matchCount = 0;
int mismatchCount = 0;
int repMatch = 0;
int countNs = 0;
DNA *np, *hp, n, h;
int blockSize;
int i;
int badScore;
int milliBad;
int passIt;

/* Count up matches, mismatches, inserts, etc. */
for (ff = left; ff != NULL; ff = nextFf)
    {
    int hStart;
    nextFf = ff->right;
    blockSize = ff->nEnd - ff->nStart;
    np = ff->nStart;
    hp = ff->hStart;
    hStart = hp - hay;
    for (i=0; i<blockSize; ++i)
	{
	n = np[i];
	h = hp[i];
	if (n == 'n' || h == 'n')
	    ++countNs;
	else
	    {
	    if (n == h)
		{
		if (repBytes[i+hStart])
		    ++repMatch;
		else
		    ++matchCount;
		}
	    else
		++mismatchCount;
	    }
	}
    if (nextFf != NULL)
	{
	if (ff->nEnd != nextFf->nStart)
	    {
	    ++nInsertCount;
	    nInsertBaseCount += nextFf->nStart - ff->nEnd;
	    }
	if (ff->hEnd != nextFf->hStart)
	    {
	    ++hInsertCount;
	    hInsertBaseCount += nextFf->hStart - ff->hEnd;
	    }
	}
    }

/* See if it looks good enough to output. */
milliBad = calcMilliBad(nEnd - nStart, hEnd - hStart, nInsertCount, hInsertCount, 
	matchCount, repMatch, mismatchCount, stringency == ffCdna);
if (veryTight)
    {
    passIt = (milliBad < 60 && 
	(matchCount >= 25 || 
	 (matchCount >= 15 && matchCount + repMatch >= 50) ||
	 (matchCount >= 5 && repMatch >= 100 && milliBad < 50)));
    }
else
    {
    passIt = (milliBad < maxBad && 
	(matchCount >= minBases || 
	 (matchCount >= minBases/2 && matchCount + repMatch >= 2*minBases) ||
	 (repMatch >= 4*minBases && milliBad < (maxBad/2))));
    }
if (passIt)
    {
    if (isRc)
	{
	int temp;
	int oSize = otherSeq->size;
	temp = nStart;
	nStart = oSize - nEnd;
	nEnd = oSize - temp;
	}
    fprintf(out, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t"
                 "%c\t"
		 "%s\t%d\t%d\t%d\t"
		 "%s\t%d\t%d\t%d\t%d\t",
	matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount,
	(isRc ? '-' : '+'),
	otherSeq->name, otherSeq->size, nStart, nEnd,
	genoSeq->name, genoSeq->size, hStart, hEnd,
	ffAliCount(left));
    for (ff = left; ff != NULL; ff = ff->right)
	fprintf(out, "%d,", ff->nEnd - ff->nStart);
    fprintf(out, "\t");
    for (ff = left; ff != NULL; ff = ff->right)
	fprintf(out, "%d,", ff->nStart - needle);
    fprintf(out, "\t");
    for (ff = left; ff != NULL; ff = ff->right)
	fprintf(out, "%d,", ff->hStart - hay);
    fprintf(out, "\n");
    if (ferror(out))
	{
	perror("");
	errAbort("Write error to .psl");
	}
    }
}
Example #6
0
static void savePslx(char *chromName, int chromSize, int chromOffset,
                     struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq,
                     boolean isRc, enum ffStringency stringency, int minMatch, FILE *f,
                     struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc,
                     struct hash *maskHash, int minIdentity,
                     boolean qIsProt, boolean tIsProt, boolean saveSeq)
/* Analyse one alignment and if it looks good enough write it out to file in
 * psl format (or pslX format - if saveSeq is TRUE).  */
{
    /* This function was stolen from psLayout and slightly extensively to cope
     * with protein as well as DNA aligments. */
    struct ffAli *ff, *nextFf;
    struct ffAli *right = ffRightmost(ali);
    DNA *needle = qSeq->dna;
    DNA *hay = tSeq->dna;
    int nStart = ali->nStart - needle;
    int nEnd = right->nEnd - needle;
    int hStart, hEnd;
    int nInsertBaseCount = 0;
    int nInsertCount = 0;
    int hInsertBaseCount = 0;
    int hInsertCount = 0;
    int matchCount = 0;
    int mismatchCount = 0;
    int repMatch = 0;
    int countNs = 0;
    DNA *np, *hp, n, h;
    int blockSize;
    int i;
    struct trans3 *t3List = NULL;
    Bits *maskBits = NULL;

    if (maskHash != NULL)
        maskBits = hashMustFindVal(maskHash, tSeq->name);
    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset;
    hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset;

    /* Count up matches, mismatches, inserts, etc. */
    for (ff = ali; ff != NULL; ff = nextFf)
    {
        nextFf = ff->right;
        blockSize = ff->nEnd - ff->nStart;
        np = ff->nStart;
        hp = ff->hStart;
        for (i=0; i<blockSize; ++i)
        {
            n = np[i];
            h = hp[i];
            if (n == 'n' || h == 'n')
                ++countNs;
            else
            {
                if (n == h)
                {
                    if (maskBits != NULL)
                    {
                        int seqOff = hp + i - hay;
                        if (bitReadOne(maskBits, seqOff))
                            ++repMatch;
                        else
                            ++matchCount;
                    }
                    else
                        ++matchCount;
                }
                else
                    ++mismatchCount;
            }
        }
        if (nextFf != NULL)
        {
            int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset;
            int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset;
            int hGap = nhStart - ohEnd;
            int nGap = nextFf->nStart - ff->nEnd;

            if (nGap != 0)
            {
                ++nInsertCount;
                nInsertBaseCount += nGap;
            }
            if (hGap != 0)
            {
                ++hInsertCount;
                hInsertBaseCount += hGap;
            }
        }
    }


    /* See if it looks good enough to output, and output. */
    /* if (score >= minMatch) Moved to higher level */
    {
        int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount);
        int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount);
        if (id >= minIdentity)
        {
            if (isRc)
            {
                int temp;
                int oSize = qSeq->size;
                temp = nStart;
                nStart = oSize - nEnd;
                nEnd = oSize - temp;
            }
            if (targetIsRc)
            {
                int temp;
                temp = hStart;
                hStart = chromSize - hEnd;
                hEnd = chromSize - temp;
            }
            fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c",
                    matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount,
                    (isRc ? '-' : '+'));
            if (reportTargetStrand)
                fprintf(f, "%c", (targetIsRc ? '-' : '+') );
            fprintf(f, "\t%s\t%d\t%d\t%d\t"
                    "%s\t%d\t%d\t%d\t%d\t",
                    qSeq->name, qSeq->size, nStart, nEnd,
                    chromName, chromSize, hStart, hEnd,
                    ffAliCount(ali));
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nStart - needle));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset);
            if (saveSeq)
            {
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->nStart, ff->nEnd - ff->nStart);
                    fputc(',', f);
                }
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->hStart, ff->hEnd - ff->hStart);
                    fputc(',', f);
                }
            }
            fprintf(f, "\n");
            if (ferror(f))
            {
                perror("");
                errAbort("Write error to .psl");
            }
        }
    }
}