static struct ffAli *scanForSmallerExons( int seedSize, struct dnaSeq *qSeq, struct dnaSeq *tSeq, boolean isRc, struct ffAli *ffList) /* Look for exons too small to be caught by index. */ { struct ffAli *extraList = NULL, *ff = ffList, *lastFf = NULL, *newFf; if (ff == NULL) return NULL; /* Look for initial gap. */ newFf = scanTinyOne(qSeq->dna, ff->nStart, tSeq->dna, ff->hStart, isRc, TRUE, FALSE, seedSize); ffCat(&extraList, &newFf); /* Look for middle gaps. */ for (;;) { lastFf = ff; ff = ff->right; if (ff == NULL) break; newFf = scanTinyOne(lastFf->nEnd, ff->nStart, lastFf->hEnd, ff->hStart, isRc, FALSE, FALSE, seedSize); ffCat(&extraList, &newFf); } /* Look for end gaps. */ newFf = scanTinyOne(lastFf->nEnd, qSeq->dna + qSeq->size, lastFf->hEnd, tSeq->dna + tSeq->size, isRc, FALSE, TRUE, seedSize); ffCat(&extraList, &newFf); ffList = foldInExtras(qSeq, tSeq, ffList, extraList); return ffList; }
struct ffAli *cutAtBigIntrons(struct ffAli *ffList, int maxIntron, int *pScore, enum ffStringency stringency, boolean isProt, bioSeq *tSeq, struct trans3 *t3List, struct ffAli **returnLeftovers) /* Return ffList up to the first intron that's too big. * Put the rest of the blocks back onto the leftovers list. */ { struct ffAli *prevFf, *ff, *cutFf = NULL; prevFf = ffList; for (ff = prevFf->right; ff != NULL; ff = ff->right) { int nhStart = trans3GenoPos( ff->hStart, tSeq, t3List, FALSE); int ohEnd = trans3GenoPos(prevFf->hEnd , tSeq, t3List, TRUE); int dt = nhStart - ohEnd; if (dt > maxIntron) { cutFf = prevFf; break; } prevFf = ff; } if (cutFf != NULL) { ff = cutFf->right; cutFf->right = NULL; ff->left = NULL; ffCat(returnLeftovers, &ff); if (isProt) *pScore = ffScoreProtein(ffList, stringency); else *pScore = ffScore(ffList, stringency); } return ffList; }
static void bandExtAfter(struct axtScoreScheme *ss, struct ffAli *ff, int qGap, int tGap, struct ffAli **pExtraList) /* Add in blocks from a banded extension after ff into the gap * and append results if any to *pExtraList. */ { struct ffAli *ext; int minGap = min(qGap, tGap); int maxGap = minGap * 2; if (minGap > 0) { if (qGap > maxGap) qGap = maxGap; if (tGap > maxGap) tGap = maxGap; ext = bandExtFf(NULL, ss, 3, ff, ff->nEnd, ff->nEnd + qGap, ff->hEnd, ff->hEnd + tGap, 1, maxGap); ffCat(pExtraList, &ext); } }
static struct ffAli *scanForTinyInternal(struct dnaSeq *qSeq, struct dnaSeq *tSeq, boolean isRc, struct ffAli *ffList) /* Look for exons too small to be caught by index. */ { struct ffAli *extraList = NULL, *ff = ffList, *lastFf = NULL, *newFf; if (ff == NULL) return NULL; /* Look for middle gaps. */ for (;;) { lastFf = ff; ff = ff->right; if (ff == NULL) break; newFf = fillInExact(lastFf->nEnd, ff->nStart, lastFf->hEnd, ff->hStart, isRc, FALSE, FALSE, 0); ffCat(&extraList, &newFf); } ffList = foldInExtras(qSeq, tSeq, ffList, extraList); return ffList; }
static struct ffAli *findFromSmallerSeeds(char *nStart, char *nEnd, char *hStart, char *hEnd, boolean isRc, boolean scanLeft, boolean scanRight, int seedSize, int resolveLimit) /* Look for matches with smaller seeds. */ { int nGap = nEnd - nStart; if (nGap >= seedSize) { struct ffAli *ffList; if (scanLeft || scanRight) { int hGap = hEnd - hStart; int maxSize = seedResolvePower(seedSize, resolveLimit); if (hGap > maxSize) hGap = maxSize; if (scanLeft) hStart = hEnd - hGap; if (scanRight) hEnd = hStart + hGap; } ffList = ffFindExtendNmers(nStart, nEnd, hStart, hEnd, seedSize); if (ffList != NULL) { struct ffAli *extensions = NULL, *ff; struct axtScoreScheme *ss = axtScoreSchemeRnaDefault(); for (ff = ffList; ff != NULL; ff = ff->right) { bandExtBefore(ss, ff, ff->nStart - nStart, ff->hStart - hStart, &extensions); bandExtAfter(ss, ff, nEnd - ff->nEnd, hEnd - ff->hEnd, &extensions); } ffCat(&ffList, &extensions); } return ffList; } return NULL; }
void ssStitch(struct ssBundle *bundle, enum ffStringency stringency, int minScore, int maxToReturn) /* Glue together mrnas in bundle as much as possible. Returns number of * alignments after stitching. Updates bundle->ffList with stitched * together version. */ { struct dnaSeq *qSeq = bundle->qSeq; struct dnaSeq *genoSeq = bundle->genoSeq; struct ffAli *ffList = NULL; struct ssFfItem *ffl; struct ffAli *bestPath; int score; boolean firstTime = TRUE; if (bundle->ffList == NULL) return; /* The score may improve when we stitch together more alignments, * so don't let minScore be too harsh at this stage. */ if (minScore > 20) minScore = 20; /* Create ffAlis for all in bundle and move to one big list. */ for (ffl = bundle->ffList; ffl != NULL; ffl = ffl->next) { ffCat(&ffList, &ffl->ff); } slFreeList(&bundle->ffList); ffAliSort(&ffList, ffCmpHitsNeedleFirst); ffList = ffMergeClose(ffList, qSeq->dna, genoSeq->dna); while (ffList != NULL) { ssFindBest(ffList, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List, &bestPath, &score, &ffList); bestPath = ffMergeNeedleAlis(bestPath, TRUE); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (!bestPath) { ffFreeAli(&ffList); break; } bestPath = ffMergeHayOverlaps(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); bestPath = forceMonotonic(bestPath, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List); if (firstTime && stringency == ffCdna && bundle->avoidFuzzyFindKludge == FALSE) { /* Only look for middle exons the first time. Next times * this might regenerate most of the first alignment... */ bestPath = smallMiddleExons(bestPath, bundle, stringency); } bestPath = ffMergeNeedleAlis(bestPath, TRUE); if (ffIntronMax != ffIntronMaxDefault) { bestPath = cutAtBigIntrons(bestPath, ffIntronMax, &score, stringency, bundle->isProt, genoSeq, bundle->t3List, &ffList); } if (!bundle->isProt) ffSlideIntrons(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (score >= minScore) { AllocVar(ffl); ffl->ff = bestPath; slAddHead(&bundle->ffList, ffl); } else { ffFreeAli(&bestPath); ffFreeAli(&ffList); break; } firstTime = FALSE; if (--maxToReturn <= 0) { ffFreeAli(&ffList); break; } } slReverse(&bundle->ffList); return; }