SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
	{
	const unsigned uLengthA = seqA.Length();
	const unsigned uLengthB = seqB.Length();
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	SCORE **MxRowA = DPM.MxRowA;
	unsigned *LettersB = DPM.LettersB;

	RowFromSeq(seqA, MxRowA);
	LettersFromSeq(seqB, LettersB);

	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

#if	DEBUG
	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
#endif

// Special case for i=0
	TraceBack[0][0] = 0;
	MPrev[0] = MxRowA[0][LettersB[0]];

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		unsigned uLetterB = LettersB[j];

	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));

		const SCORE *RowA = MxRowA[i];
		const SCORE *ptrRowA = MxRowA[i];
		const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
		unsigned *ptrLettersB = LettersB;
		for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
			{
			*ptrMCurr_j = RowA[*ptrLettersB];
			++ptrLettersB;
			}

		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		ptrMCurr_j = MCurr;
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += g_scoreGapOpen/2;	// term gaps half

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + g_scoreGapOpen;
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1;
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + g_scoreGapOpen;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2;	// term gaps half
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev - g_scoreGapOpen/2;
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
Esempio n. 2
0
SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)
		Quit("GlobalAlignSPN: must be nucleo");

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;

		for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
		}

	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	MPrev[0] = scoreSum - g_scoreCenter;

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

		for (unsigned j = 0; j < uLengthB; ++j)
			MCurr[j] -= g_scoreCenter;

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}