Beispiel #1
0
static struct ffAli *ffFindExtendNmers(char *nStart, char *nEnd, char *hStart, char *hEnd,
	int seedSize)
/* Find perfectly matching n-mers and extend them. */
{
struct lm *lm = lmInit(32*1024);
struct seqHashEl **hashTable, *hashEl, **hashSlot;
struct ffAli *ffList = NULL, *ff;
char *n = nStart, *h = hStart, *ne = nEnd - seedSize, *he = hEnd - seedSize;

/* Hash the needle. */
lmAllocArray(lm, hashTable, 4*1024);
while (n <= ne)
    {
    if (!totalDegenerateN(n, seedSize))
	{
	hashSlot = ffHashFuncN(n, seedSize) + hashTable;
	lmAllocVar(lm, hashEl);
	hashEl->seq = n;
	slAddHead(hashSlot, hashEl);
	}
    ++n;
    }

/* Scan the haystack adding hits. */
while (h <= he)
    {
    for (hashEl = hashTable[ffHashFuncN(h, seedSize)]; 
    	hashEl != NULL; hashEl = hashEl->next)
	{
	if (memcmp(hashEl->seq, h, seedSize) == 0)
	    {
	    AllocVar(ff);
	    ff->hStart = h;
	    ff->hEnd = h + seedSize;
	    ff->nStart = hashEl->seq;
	    ff->nEnd = hashEl->seq + seedSize;
	    extendExactLeft(ff->nStart - nStart, ff->hStart - hStart, 
		&ff->nStart, &ff->hStart);
	    extendExactRight(nEnd - ff->nEnd, hEnd - ff->hEnd, &ff->nEnd, &ff->hEnd);
	    ff->left = ffList;
	    ffList = ff;
	    }
	}
    ++h;
    }
ffList = ffMakeRightLinks(ffList);
ffList = ffMergeClose(ffList, nStart, hStart);
lmCleanup(&lm);
return ffList;
}
Beispiel #2
0
void ssStitch(struct ssBundle *bundle, enum ffStringency stringency, 
	int minScore, int maxToReturn)
/* Glue together mrnas in bundle as much as possible. Returns number of
 * alignments after stitching. Updates bundle->ffList with stitched
 * together version. */
{
struct dnaSeq *qSeq =  bundle->qSeq;
struct dnaSeq *genoSeq = bundle->genoSeq;
struct ffAli *ffList = NULL;
struct ssFfItem *ffl;
struct ffAli *bestPath;
int score;
boolean firstTime = TRUE;

if (bundle->ffList == NULL)
    return;

/* The score may improve when we stitch together more alignments,
 * so don't let minScore be too harsh at this stage. */
if (minScore > 20)
    minScore = 20;

/* Create ffAlis for all in bundle and move to one big list. */
for (ffl = bundle->ffList; ffl != NULL; ffl = ffl->next)
    {
    ffCat(&ffList, &ffl->ff);
    }
slFreeList(&bundle->ffList);

ffAliSort(&ffList, ffCmpHitsNeedleFirst);
ffList = ffMergeClose(ffList, qSeq->dna, genoSeq->dna);

while (ffList != NULL)
    {

    ssFindBest(ffList, qSeq, genoSeq, stringency, 
    	bundle->isProt, bundle->t3List,
    	&bestPath, &score, &ffList);


    bestPath = ffMergeNeedleAlis(bestPath, TRUE);
    bestPath = ffRemoveEmptyAlis(bestPath, TRUE);
    if (!bestPath)
	{
	ffFreeAli(&ffList);
	break;
	}
    bestPath = ffMergeHayOverlaps(bestPath);
    bestPath = ffRemoveEmptyAlis(bestPath, TRUE);
    bestPath = forceMonotonic(bestPath, qSeq, genoSeq, stringency,
    	bundle->isProt, bundle->t3List);

    if (firstTime && stringency == ffCdna && bundle->avoidFuzzyFindKludge == FALSE)
	{
	/* Only look for middle exons the first time.  Next times
	 * this might regenerate most of the first alignment... */
	bestPath = smallMiddleExons(bestPath, bundle, stringency);
	}
    bestPath = ffMergeNeedleAlis(bestPath, TRUE);
    if (ffIntronMax != ffIntronMaxDefault)
	{
	bestPath = cutAtBigIntrons(bestPath, ffIntronMax, &score, stringency, 
		bundle->isProt, genoSeq, bundle->t3List,
		&ffList);
	}
    if (!bundle->isProt)
	ffSlideIntrons(bestPath);
    bestPath = ffRemoveEmptyAlis(bestPath, TRUE);
    if (score >= minScore)
	{
	AllocVar(ffl);
	ffl->ff = bestPath;
	slAddHead(&bundle->ffList, ffl);
	}
    else
	{
	ffFreeAli(&bestPath);
	ffFreeAli(&ffList);
	break;
	}
    firstTime = FALSE;
    if (--maxToReturn <= 0)
	{
	ffFreeAli(&ffList);
	break;
	}
    }
slReverse(&bundle->ffList);
return;
}