static struct ffAli *ffFindExtendNmers(char *nStart, char *nEnd, char *hStart, char *hEnd, int seedSize) /* Find perfectly matching n-mers and extend them. */ { struct lm *lm = lmInit(32*1024); struct seqHashEl **hashTable, *hashEl, **hashSlot; struct ffAli *ffList = NULL, *ff; char *n = nStart, *h = hStart, *ne = nEnd - seedSize, *he = hEnd - seedSize; /* Hash the needle. */ lmAllocArray(lm, hashTable, 4*1024); while (n <= ne) { if (!totalDegenerateN(n, seedSize)) { hashSlot = ffHashFuncN(n, seedSize) + hashTable; lmAllocVar(lm, hashEl); hashEl->seq = n; slAddHead(hashSlot, hashEl); } ++n; } /* Scan the haystack adding hits. */ while (h <= he) { for (hashEl = hashTable[ffHashFuncN(h, seedSize)]; hashEl != NULL; hashEl = hashEl->next) { if (memcmp(hashEl->seq, h, seedSize) == 0) { AllocVar(ff); ff->hStart = h; ff->hEnd = h + seedSize; ff->nStart = hashEl->seq; ff->nEnd = hashEl->seq + seedSize; extendExactLeft(ff->nStart - nStart, ff->hStart - hStart, &ff->nStart, &ff->hStart); extendExactRight(nEnd - ff->nEnd, hEnd - ff->hEnd, &ff->nEnd, &ff->hEnd); ff->left = ffList; ffList = ff; } } ++h; } ffList = ffMakeRightLinks(ffList); ffList = ffMergeClose(ffList, nStart, hStart); lmCleanup(&lm); return ffList; }
void ssStitch(struct ssBundle *bundle, enum ffStringency stringency, int minScore, int maxToReturn) /* Glue together mrnas in bundle as much as possible. Returns number of * alignments after stitching. Updates bundle->ffList with stitched * together version. */ { struct dnaSeq *qSeq = bundle->qSeq; struct dnaSeq *genoSeq = bundle->genoSeq; struct ffAli *ffList = NULL; struct ssFfItem *ffl; struct ffAli *bestPath; int score; boolean firstTime = TRUE; if (bundle->ffList == NULL) return; /* The score may improve when we stitch together more alignments, * so don't let minScore be too harsh at this stage. */ if (minScore > 20) minScore = 20; /* Create ffAlis for all in bundle and move to one big list. */ for (ffl = bundle->ffList; ffl != NULL; ffl = ffl->next) { ffCat(&ffList, &ffl->ff); } slFreeList(&bundle->ffList); ffAliSort(&ffList, ffCmpHitsNeedleFirst); ffList = ffMergeClose(ffList, qSeq->dna, genoSeq->dna); while (ffList != NULL) { ssFindBest(ffList, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List, &bestPath, &score, &ffList); bestPath = ffMergeNeedleAlis(bestPath, TRUE); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (!bestPath) { ffFreeAli(&ffList); break; } bestPath = ffMergeHayOverlaps(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); bestPath = forceMonotonic(bestPath, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List); if (firstTime && stringency == ffCdna && bundle->avoidFuzzyFindKludge == FALSE) { /* Only look for middle exons the first time. Next times * this might regenerate most of the first alignment... */ bestPath = smallMiddleExons(bestPath, bundle, stringency); } bestPath = ffMergeNeedleAlis(bestPath, TRUE); if (ffIntronMax != ffIntronMaxDefault) { bestPath = cutAtBigIntrons(bestPath, ffIntronMax, &score, stringency, bundle->isProt, genoSeq, bundle->t3List, &ffList); } if (!bundle->isProt) ffSlideIntrons(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (score >= minScore) { AllocVar(ffl); ffl->ff = bestPath; slAddHead(&bundle->ffList, ffl); } else { ffFreeAli(&bestPath); ffFreeAli(&ffList); break; } firstTime = FALSE; if (--maxToReturn <= 0) { ffFreeAli(&ffList); break; } } slReverse(&bundle->ffList); return; }