Ejemplo n.º 1
0
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	g_bKeepSimpleDP = true;
	PWPath SimplePath;
	GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath);

	SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);

	if (!Path.Equal(SimplePath))
		{
		Log("Simple:\n");
		SimplePath.LogMe();
		Log("Small:\n");
		Path.LogMe();
		Quit("Paths differ");
		}

#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksDP += (t2 - t1);
#endif
	return Score;
	}
Ejemplo n.º 2
0
static void AppendRegPath(PWPath &Path, const PWPath &RegPath)
	{
	const unsigned uRegEdgeCount = RegPath.GetEdgeCount();
	for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex)
		{
		const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex);
		Path.AppendEdge(RegEdge);
		}
	}
Ejemplo n.º 3
0
static void AllDeletes(PWPath &Path, unsigned uLengthA)
	{
	Path.Clear();
	PWEdge Edge;
	Edge.cType = 'D';
	Edge.uPrefixLengthB = 0;
	for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA)
		{
		Edge.uPrefixLengthA = uPrefixLengthA;
		Path.AppendEdge(Edge);
		}
	}
Ejemplo n.º 4
0
static void AllInserts(PWPath &Path, unsigned uLengthB)
	{
	Path.Clear();
	PWEdge Edge;
	Edge.cType = 'I';
	Edge.uPrefixLengthA = 0;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB)
		{
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.AppendEdge(Edge);
		}
	}
Ejemplo n.º 5
0
static void DiagToPath(const Diag &d, PWPath &Path)
	{
	Path.Clear();
	const unsigned uLength = d.m_uLength;
	for (unsigned i = 0; i < uLength; ++i)
		{
		PWEdge Edge;
		Edge.cType = 'M';
		Edge.uPrefixLengthA = d.m_uStartPosA + i + 1;
		Edge.uPrefixLengthB = d.m_uStartPosB + i + 1;
		Path.AppendEdge(Edge);
		}
	}
Ejemplo n.º 6
0
static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB)
	{
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);

	// Nasty hack -- poke new values back into path, circumventing class
		PWEdge &NonConstEdge = (PWEdge &) Edge;
		NonConstEdge.uPrefixLengthA += uOffsetA;
		NonConstEdge.uPrefixLengthB += uOffsetB;
		}
	}
Ejemplo n.º 7
0
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
	{
	short *esA;
	short *esB;
	PathToEstrings(Path, &esA, &esB);

	const unsigned uSeqLength = s.Length();
	const unsigned uEdgeCount = Path.GetEdgeCount();

	sOut.Clear();
	sOut.SetName(s.GetName());
	unsigned uPos = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cType = Edge.cType;
		if (bRight)
			{
			if (cType == 'I')
				cType = 'D';
			else if (cType == 'D')
				cType = 'I';
			}
		switch (cType)
			{
		case 'M':
			sOut.AppendChar(s[uPos++]);
			break;
		case 'D':
			sOut.AppendChar('-');
			break;
		case 'I':
			sOut.AppendChar(s[uPos++]);
			break;
		default:
			Quit("PathSeq, invalid edge type %c", cType);
			}
		}
	}
Ejemplo n.º 8
0
void DiagList::FromPath(const PWPath &Path)
	{
    unsigned &g_uMinDiagLength = getMuscleContext()->params.g_uMinDiagLength;

	Clear();

	const unsigned uEdgeCount = Path.GetEdgeCount();
	unsigned uLength = 0;
	unsigned uStartPosA;
	unsigned uStartPosB;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);

	// Typical cases
		if (Edge.cType == 'M')
			{
			if (0 == uLength)
				{
				uStartPosA = Edge.uPrefixLengthA - 1;
				uStartPosB = Edge.uPrefixLengthB - 1;
				}
			++uLength;
			}
		else
			{
			if (uLength >= g_uMinDiagLength)
				Add(uStartPosA, uStartPosB, uLength);
			uLength = 0;
			}
		}

// Special case for last edge
	if (uLength >= g_uMinDiagLength)
		Add(uStartPosA, uStartPosB, uLength);
	}
Ejemplo n.º 9
0
    bool TryRealign(MSA &msaIn, const Tree &tree, const unsigned Leaves1[],
        unsigned uCount1, const unsigned Leaves2[], unsigned uCount2,
        SCORE *ptrscoreBefore, SCORE *ptrscoreAfter,
        bool bLockLeft, bool bLockRight)
    {
#if	TRACE
        Log("TryRealign, msaIn=\n");
#endif

        MuscleContext *ctx = getMuscleContext();

        const unsigned uSeqCount = msaIn.GetSeqCount();

        unsigned *Ids1 = new unsigned[uSeqCount];
        unsigned *Ids2 = new unsigned[uSeqCount];

        LeafIndexesToIds(tree, Leaves1, uCount1, Ids1);
        LeafIndexesToIds(tree, Leaves2, uCount2, Ids2);

        MSA msa1;
        MSA msa2;

        MSASubsetByIds(msaIn, Ids1, uCount1, msa1);
        MSASubsetByIds(msaIn, Ids2, uCount2, msa2);

#if	DEBUG
        ValidateMuscleIds(msa1);
        ValidateMuscleIds(msa2);
#endif

        // Computing the objective score may be expensive for
        // large numbers of sequences. As a speed optimization,
        // we check whether the alignment changes. If it does
        // not change, there is no need to compute the objective
        // score. We test for the alignment changing by comparing
        // the Viterbi paths before and after re-aligning.
        PWPath pathBefore;
        pathBefore.FromMSAPair(msa1, msa2);

        DeleteGappedCols(msa1);
        DeleteGappedCols(msa2);

        if (0 == msa1.GetColCount() || 0 == msa2.GetColCount()) {
            delete[] Ids1;
            delete[] Ids2;
            return false;
        }

        MSA msaRealigned;
        PWPath pathAfter;

        AlignTwoMSAs(msa1, msa2, msaRealigned, pathAfter, bLockLeft, bLockRight);

        bool bAnyChanges = !pathAfter.Equal(pathBefore);
        unsigned uDiffCount1;
        unsigned uDiffCount2;
        unsigned* Edges1 = ctx->refinehoriz.Edges1;
        unsigned* Edges2 = ctx->refinehoriz.Edges2;
        DiffPaths(pathBefore, pathAfter, Edges1, &uDiffCount1, Edges2, &uDiffCount2);

#if	TRACE
        Log("TryRealign, msa1=\n");
        Log("\nmsa2=\n");
        Log("\nRealigned (changes %s)=\n", bAnyChanges ? "TRUE" : "FALSE");
#endif

        if (!bAnyChanges)
        {
            *ptrscoreBefore = 0;
            *ptrscoreAfter = 0;
            delete[] Ids1;
            delete[] Ids2;
            return false;
        }

        SetMSAWeightsMuscle(msaIn);
        SetMSAWeightsMuscle(msaRealigned);

#if	DIFFOBJSCORE
        const SCORE scoreDiff = DiffObjScore(msaIn, pathBefore, Edges1, uDiffCount1,
            msaRealigned, pathAfter, Edges2, uDiffCount2);
        bool bAccept = (scoreDiff > 0);
        *ptrscoreBefore = 0;
        *ptrscoreAfter = scoreDiff;
        //const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
        //const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
        //Log("Diff = %.3g %.3g\n", scoreDiff, scoreAfter - scoreBefore);
#else
        const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
        const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);

        bool bAccept = (scoreAfter > scoreBefore);

#if	TRACE
        Log("Score %g -> %g Accept %s\n", scoreBefore, scoreAfter, bAccept ? "TRUE" : "FALSE");
#endif

        *ptrscoreBefore = scoreBefore;
        *ptrscoreAfter = scoreAfter;
#endif

        if (bAccept)
            msaIn.Copy(msaRealigned);
        delete[] Ids1;
        delete[] Ids2;
        return bAccept;
    }
Ejemplo n.º 10
0
ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a)
	{
	assert(GuideTree.IsRooted());

#if	TRACE
	Log("GuideTree:\n");
	GuideTree.LogMe();
#endif

	const unsigned uSeqCount = v.Length();
	const unsigned uNodeCount = 2*uSeqCount - 1;
	const unsigned uIterCount = uSeqCount - 1;

	WEIGHT *Weights = new WEIGHT[uSeqCount];
	CalcClustalWWeights(GuideTree, Weights);

	ProgNode *ProgNodes = new ProgNode[uNodeCount];

	unsigned uJoin = 0;
	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
	SetProgressDesc("Align node");
	do
		{
		if (GuideTree.IsLeaf(uTreeNodeIndex))
			{
			if (uTreeNodeIndex >= uNodeCount)
				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
			ProgNode &Node = ProgNodes[uTreeNodeIndex];
			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
			if (uId >= uSeqCount)
				Quit("Seq index out of range");
			const Seq &s = *(v[uId]);
			Node.m_MSA.FromSeq(s);
			Node.m_MSA.SetSeqId(0, uId);
			Node.m_uLength = Node.m_MSA.GetColCount();
			Node.m_Weight = Weights[uId];
		// TODO: Term gaps settable
			Node.m_Prof = ProfileFromMSA(Node.m_MSA);
			Node.m_EstringL = 0;
			Node.m_EstringR = 0;
#if	TRACE
			Log("Leaf id=%u\n", uId);
			Log("MSA=\n");
			Node.m_MSA.LogMe();
			Log("Profile (from MSA)=\n");
			ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA);
#endif
			}
		else
			{
			Progress(uJoin, uSeqCount - 1);
			++uJoin;

			const unsigned uMergeNodeIndex = uTreeNodeIndex;
			ProgNode &Parent = ProgNodes[uMergeNodeIndex];

			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);

			if (g_bVerbose)
				{
				Log("Align: (");
				LogLeafNames(GuideTree, uLeft);
				Log(") (");
				LogLeafNames(GuideTree, uRight);
				Log(")\n");
				}

			ProgNode &Node1 = ProgNodes[uLeft];
			ProgNode &Node2 = ProgNodes[uRight];

#if	TRACE
			Log("AlignTwoMSAs:\n");
#endif
			AlignTwoProfs(
			  Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
			  Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
			  Parent.m_Path,
			  &Parent.m_Prof, &Parent.m_uLength);
#if	TRACE_LENGTH_DELTA
			{
			unsigned L = Node1.m_uLength;
			unsigned R = Node2.m_uLength;
			unsigned P = Parent.m_Path.GetEdgeCount();
			unsigned Max = L > R ? L : R;
			unsigned d = P - Max;
			Log("LD%u;%u;%u;%u\n", L, R, P, d);
			}
#endif
			PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);

			Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;

#if	VALIDATE
			{
#if	TRACE
			Log("AlignTwoMSAs:\n");
#endif
			PWPath TmpPath;
			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath);
			ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true);
			ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true);
			unsigned uLength = Parent.m_MSA.GetColCount();
			ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true);

#if	TRACE
			Log("Node1 MSA=\n");
			Node1.m_MSA.LogMe();

			Log("Node1 prof=\n");
			ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
			Log("Node1 prof (from MSA)=\n");
			ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA);

			AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount());

			Log("Node2 prof=\n");
			ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA);

			Log("Node2 MSA=\n");
			Node2.m_MSA.LogMe();

			Log("Node2 prof (from MSA)=\n");
			ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA);

			AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount());

			TmpPath.AssertEqual(Parent.m_Path);

			Log("Parent MSA=\n");
			Parent.m_MSA.LogMe();

			Log("Parent prof=\n");
			ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA);

			Log("Parent prof (from MSA)=\n");
			ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA);

#endif	// TRACE
			AssertProfsEq(Parent.m_Prof, Parent.m_uLength,
			  TmpProf, Parent.m_MSA.GetColCount());
			delete[] P1;
			delete[] P2;
			delete[] TmpProf;
			}
#endif	// VALIDATE

			Node1.m_MSA.Clear();
			Node2.m_MSA.Clear();

		// Don't delete profiles, may need them for tree refinement.
			//delete[] Node1.m_Prof;
			//delete[] Node2.m_Prof;
			//Node1.m_Prof = 0;
			//Node2.m_Prof = 0;
			}
		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);
	ProgressStepsDone();

	if (g_bBrenner)
		MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a);
	else
		MakeRootMSA(v, GuideTree, ProgNodes, a);

#if	VALIDATE
	{
	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
	AssertMSAEq(a, RootProgNode.m_MSA);
	}
#endif

	delete[] Weights;
	return ProgNodes;
	}
Ejemplo n.º 11
0
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
	{
// First pass to determine size of estrings esA and esB
	const unsigned uEdgeCount = Path.GetEdgeCount();
	if (0 == uEdgeCount)
		{
		short *esA = new short[1];
		short *esB = new short[1];
		esA[0] = 0;
		esB[0] = 0;
		*ptresA = esA;
		*ptresB = esB;
		return;
		}

	unsigned iLengthA = 1;
	unsigned iLengthB = 1;
	const char cFirstEdgeType = Path.GetEdge(0).cType;
	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('D', 'D'):
		case c2('I', 'I'):
			break;

		case c2('D', 'M'):
		case c2('M', 'D'):
			++iLengthB;
			break;

		case c2('I', 'M'):
		case c2('M', 'I'):
			++iLengthA;
			break;

		case c2('I', 'D'):
		case c2('D', 'I'):
			++iLengthB;
			++iLengthA;
			break;

		default:
			assert(false);
			}
		cPrevEdgeType = cEdgeType;
		}

// Pass2 for seq A
	{
	short *esA = new short[iLengthA+1];
	unsigned iA = 0;
	switch (Path.GetEdge(0).cType)
		{
	case 'M':
	case 'D':
		esA[0] = 1;
		break;

	case 'I':
		esA[0] = -1;
		break;

	default:
		assert(false);
		}

	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('D', 'D'):
		case c2('D', 'M'):
		case c2('M', 'D'):
			++(esA[iA]);
			break;

		case c2('I', 'D'):
		case c2('I', 'M'):
			++iA;
			esA[iA] = 1;
			break;

		case c2('M', 'I'):
		case c2('D', 'I'):
			++iA;
			esA[iA] = -1;
			break;

		case c2('I', 'I'):
			--(esA[iA]);
			break;

		default:
			assert(false);
			}

		cPrevEdgeType = cEdgeType;
		}
	assert(iA == iLengthA - 1);
	esA[iLengthA] = 0;
	*ptresA = esA;
	}

	{
// Pass2 for seq B
	short *esB = new short[iLengthB+1];
	unsigned iB = 0;
	switch (Path.GetEdge(0).cType)
		{
	case 'M':
	case 'I':
		esB[0] = 1;
		break;

	case 'D':
		esB[0] = -1;
		break;

	default:
		assert(false);
		}

	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('I', 'I'):
		case c2('I', 'M'):
		case c2('M', 'I'):
			++(esB[iB]);
			break;

		case c2('D', 'I'):
		case c2('D', 'M'):
			++iB;
			esB[iB] = 1;
			break;

		case c2('M', 'D'):
		case c2('I', 'D'):
			++iB;
			esB[iB] = -1;
			break;

		case c2('D', 'D'):
			--(esB[iB]);
			break;

		default:
			assert(false);
			}

		cPrevEdgeType = cEdgeType;
		}
	assert(iB == iLengthB - 1);
	esB[iLengthB] = 0;
	*ptresB = esB;
	}

#if	DEBUG
	{
	const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(*ptresA, &uSymbols, &uIndels);
	assert(uSymbols == LastEdge.uPrefixLengthA);
	assert(uSymbols + uIndels == uEdgeCount);

	EstringCounts(*ptresB, &uSymbols, &uIndels);
	assert(uSymbols == LastEdge.uPrefixLengthB);
	assert(uSymbols + uIndels == uEdgeCount);

	PWPath TmpPath;
	EstringsToPath(*ptresA, *ptresB, TmpPath);
	TmpPath.AssertEqual(Path);
	}
#endif
	}
Ejemplo n.º 12
0
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("AlignTwoMSAsGivenPathSW\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
	}
Ejemplo n.º 13
0
SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	assert(uLengthB > 0 && uLengthA > 0);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

// Allocate DP matrices
	const size_t LM = uPrefixCountA*uPrefixCountB;
	SCORE *DPM_ = new SCORE[LM];
	SCORE *DPD_ = new SCORE[LM];
	SCORE *DPI_ = new SCORE[LM];

	DPM(0, 0) = 0;
	DPD(0, 0) = MINUS_INFINITY;
	DPI(0, 0) = MINUS_INFINITY;

	DPM(1, 0) = MINUS_INFINITY;
	DPD(1, 0) = MINUS_INFINITY;
	DPI(1, 0) = MINUS_INFINITY;

	DPM(0, 1) = MINUS_INFINITY;
	DPD(0, 1) = MINUS_INFINITY;
	DPI(0, 1) = MINUS_INFINITY;

// Empty prefix of B is special case
	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;

	// D=LetterA+GapB, never optimal in local alignment with gap penalties
		DPD(uPrefixLengthA, 0) = MINUS_INFINITY;

	// I=GapA+LetterB, impossible with empty prefix
		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
		}

// Empty prefix of A is special case
	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(0, uPrefixLengthB) = MINUS_INFINITY;

	// D=LetterA+GapB, impossible with empty prefix
		DPD(0, uPrefixLengthB) = MINUS_INFINITY;

	// I=GapA+LetterB, never optimal in local alignment with gap penalties
		DPI(0, uPrefixLengthB) = MINUS_INFINITY;
		}

	SCORE scoreMax = MINUS_INFINITY;
	unsigned uPrefixLengthAMax = uInsane;
	unsigned uPrefixLengthBMax = uInsane;

// ============
// Main DP loop
// ============
	SCORE scoreGapCloseB = MINUS_INFINITY;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		const ProfPos &PPB = PB[uPrefixLengthB - 1];

		SCORE scoreGapCloseA = MINUS_INFINITY;
		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
			{
			const ProfPos &PPA = PA[uPrefixLengthA - 1];

			{
		// Match M=LetterA+LetterB
			SCORE scoreLL = ScoreProfPos2(PPA, PPB);

			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;

			SCORE scoreBest;
			if (scoreMM >= scoreDM && scoreMM >= scoreIM)
				scoreBest = scoreMM;
			else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
				scoreBest = scoreDM;
			else 
				{
				assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
				scoreBest = scoreIM;
				}
			if (scoreBest < 0)
				scoreBest = 0;
			scoreBest += scoreLL;
			if (scoreBest > scoreMax)
				{
				scoreMax = scoreBest;
				uPrefixLengthAMax = uPrefixLengthA;
				uPrefixLengthBMax = uPrefixLengthB;
				}
			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			{
		// Delete D=LetterA+GapB
			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen;
			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);

			SCORE scoreBest;
			if (scoreMD >= scoreDD)
				scoreBest = scoreMD;
			else
				{
				assert(scoreDD >= scoreMD);
				scoreBest = scoreDD;
				}
			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert I=GapA+LetterB
			{
			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);

			SCORE scoreBest;
			if (scoreMI >= scoreII)
				scoreBest = scoreMI;
			else 
				{
				assert(scoreII > scoreMI);
				scoreBest = scoreII;
				}
			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			scoreGapCloseA = PPA.m_scoreGapClose;
			}
		scoreGapCloseB = PPB.m_scoreGapClose;
		}

#if TRACE
	Log("DPM:\n");
	ListDP(DPM_, PA, PB, uPrefixLengthA, uPrefixLengthB);
	Log("DPD:\n");
	ListDP(DPD_, PA, PB, uPrefixLengthA, uPrefixLengthB);
	Log("DPI:\n");
	ListDP(DPI_, PA, PB, uPrefixLengthA, uPrefixLengthB);
#endif

	assert(scoreMax == DPM(uPrefixLengthAMax, uPrefixLengthBMax));
	TraceBackSW(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, 
	  uPrefixLengthAMax, uPrefixLengthBMax, Path);

#if	TRACE
	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
	Path.LogMe();
	Log("Score = %s Path = %s\n", LocalScoreToStr(scoreMax), LocalScoreToStr(scorePath));
#endif

	delete[] DPM_;
	delete[] DPD_;
	delete[] DPI_;

	return scoreMax;
	}
Ejemplo n.º 14
0
static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
                              const char *TBM_, const char *TBD_, const char *TBI_,
                              unsigned uLengthA, unsigned uLengthB, PWPath &Path)
{
    const unsigned uPrefixCountA = uLengthA + 1;

    unsigned uPrefixLengthA = uLengthA;
    unsigned uPrefixLengthB = uLengthB;

    char cEdge = 'M';
    SCORE scoreMax = DPM(uLengthA, uLengthB);
    if (DPD(uLengthA, uLengthB) > scoreMax)
    {
        scoreMax = DPD(uLengthA, uLengthB);
        cEdge = 'D';
    }
    if (DPI(uLengthA, uLengthB) > scoreMax)
    {
        scoreMax = DPI(uLengthA, uLengthB);
        cEdge = 'I';
    }

    for (;;)
    {
        if (0 == uPrefixLengthA && 0 == uPrefixLengthB)
            break;

        PWEdge Edge;
        Edge.cType = cEdge;
        Edge.uPrefixLengthA = uPrefixLengthA;
        Edge.uPrefixLengthB = uPrefixLengthB;
        Path.PrependEdge(Edge);

#if TRACE
        Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);
#endif
        switch (cEdge)
        {
        case 'M':
            assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);
            cEdge = TBM(uPrefixLengthA, uPrefixLengthB);
            --uPrefixLengthA;
            --uPrefixLengthB;
            break;
        case 'D':
            assert(uPrefixLengthA > 0);
            cEdge = TBD(uPrefixLengthA, uPrefixLengthB);
            --uPrefixLengthA;
            break;
        case 'I':
            assert(uPrefixLengthB > 0);
            cEdge = TBI(uPrefixLengthA, uPrefixLengthB);
            --uPrefixLengthB;
            break;
        default:
            Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);
        }
    }
#if	TRACE
    Path.LogMe();
#endif
    return scoreMax;
}
Ejemplo n.º 15
0
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	LIST_DIAGS
	TICKS t1 = GetClockTicks();
#endif

	DiagList DL;

	if (ALPHA_Amino == g_Alpha)
		FindDiags(PA, uLengthA, PB, uLengthB, DL);
	else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
		FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
	else
		Quit("GlobalAlignDiags: bad alpha");

#if	TRACE
	Log("GlobalAlignDiags, diag list:\n");
	DL.LogMe();
#endif

	DL.Sort();
	DL.DeleteIncompatible();

#if	TRACE
	Log("After DeleteIncompatible:\n");
	DL.LogMe();
#endif

	MergeDiags(DL);

#if	TRACE
	Log("After MergeDiags:\n");
	DL.LogMe();
#endif

	DPRegionList RL;
	DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);

#if	TRACE
	Log("RegionList:\n");
	RL.LogMe();
#endif

#if	LIST_DIAGS
	{
	TICKS t2 = GetClockTicks();
	unsigned uArea = RL.GetDPArea();
	Log("ticks=%ld\n", (long) (t2 - t1));
	Log("area=%u\n", uArea);
	}
#endif

	g_dDPAreaWithoutDiags += uLengthA*uLengthB;

	double dDPAreaWithDiags = 0.0;
	const unsigned uRegionCount = RL.GetCount();
	for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
		{
		const DPRegion &r = RL.Get(uRegionIndex);

		PWPath RegPath;
		if (DPREGIONTYPE_Diag == r.m_Type)
			{
			DiagToPath(r.m_Diag, RegPath);
#if	TRACE_PATH
			Log("DiagToPath, path=\n");
			RegPath.LogMe();
#endif
			}
		else if (DPREGIONTYPE_Rect == r.m_Type)
			{
			const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
			const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
			const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
			const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
			const ProfPos *RegPA = PA + uRegStartPosA;
			const ProfPos *RegPB = PB + uRegStartPosB;

			dDPAreaWithDiags += uRegLengthA*uRegLengthB;
			GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
#if	TRACE_PATH
			Log("GlobalAlignNoDiags RegPath=\n");
			RegPath.LogMe();
#endif
			OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
#if	TRACE_PATH
			Log("After offset path, RegPath=\n");
			RegPath.LogMe();
#endif
			}
		else
			Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);

		AppendRegPath(Path, RegPath);
#if	TRACE_PATH
		Log("After AppendPath, path=");
		Path.LogMe();
#endif
		}

#if	TRACE
	{
	double dDPAreaWithoutDiags = uLengthA*uLengthB;
	Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
	  dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
	}
#endif
	g_dDPAreaWithDiags += dDPAreaWithDiags;
	return 0;
	}
Ejemplo n.º 16
0
SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	assert(uLengthB > 0 && uLengthA > 0);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

// Allocate DP matrices
	const size_t LM = uPrefixCountA*uPrefixCountB;
	SCORE *DPL_ = new SCORE[LM];
	SCORE *DPM_ = new SCORE[LM];
	SCORE *DPD_ = new SCORE[LM];
	SCORE *DPE_ = new SCORE[LM];
	SCORE *DPI_ = new SCORE[LM];
	SCORE *DPJ_ = new SCORE[LM];

	char *TBM_ = new char[LM];
	char *TBD_ = new char[LM];
	char *TBE_ = new char[LM];
	char *TBI_ = new char[LM];
	char *TBJ_ = new char[LM];

	memset(TBM_, '?', LM);
	memset(TBD_, '?', LM);
	memset(TBE_, '?', LM);
	memset(TBI_, '?', LM);
	memset(TBJ_, '?', LM);

	DPM(0, 0) = 0;
	DPD(0, 0) = MINUS_INFINITY;
	DPE(0, 0) = MINUS_INFINITY;
	DPI(0, 0) = MINUS_INFINITY;
	DPJ(0, 0) = MINUS_INFINITY;

	DPM(1, 0) = MINUS_INFINITY;
	DPD(1, 0) = PA[0].m_scoreGapOpen;
	DPE(1, 0) = PA[0].m_scoreGapOpen2;
	TBD(1, 0) = 'D';
	TBE(1, 0) = 'E';
	DPI(1, 0) = MINUS_INFINITY;
	DPJ(1, 0) = MINUS_INFINITY;

	DPM(0, 1) = MINUS_INFINITY;
	DPD(0, 1) = MINUS_INFINITY;
	DPE(0, 1) = MINUS_INFINITY;
	DPI(0, 1) = PB[0].m_scoreGapOpen;
	DPJ(0, 1) = PB[0].m_scoreGapOpen2;
	TBI(0, 1) = 'I';
	TBJ(0, 1) = 'J';

// Empty prefix of B is special case
	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;

		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend.get();
		DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2.get();

		TBD(uPrefixLengthA, 0) = 'D';
		TBE(uPrefixLengthA, 0) = 'E';

		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
		DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
		}

// Empty prefix of A is special case
	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		DPM(0, uPrefixLengthB) = MINUS_INFINITY;

		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
		DPE(0, uPrefixLengthB) = MINUS_INFINITY;

		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend.get();
		DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2.get();

		TBI(0, uPrefixLengthB) = 'I';
		TBJ(0, uPrefixLengthB) = 'J';
		}

// Special case to agree with NWFast, no D-I transitions so...
	DPD(uLengthA, 0) = MINUS_INFINITY;
	DPE(uLengthA, 0) = MINUS_INFINITY;
//	DPI(0, uLengthB) = MINUS_INFINITY;
//	DPJ(0, uLengthB) = MINUS_INFINITY;

// ============
// Main DP loop
// ============
	SCORE scoreGapCloseB = MINUS_INFINITY;
	SCORE scoreGapClose2B = MINUS_INFINITY;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		const ProfPos &PPB = PB[uPrefixLengthB - 1];

		SCORE scoreGapCloseA = MINUS_INFINITY;
		SCORE scoreGapClose2A = MINUS_INFINITY;
		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
			{
			const ProfPos &PPA = PA[uPrefixLengthA - 1];

			{
		// Match M=LetterA+LetterB
			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;

			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
			SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
			SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;

			SCORE scoreBest;
			if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM)
				{
				scoreBest = scoreMM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM)
				{
				scoreBest = scoreDM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM)
				{
				scoreBest = scoreEM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
				}
			else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
				{
				scoreBest = scoreIM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			else
				{
				assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM);
				scoreBest = scoreJM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
				}
			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
			}

			{
		// Delete D=LetterA+GapB
			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen;
			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend.get();

			SCORE scoreBest;
			if (scoreMD >= scoreDD)
				{
				scoreBest = scoreMD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else
				{
				assert(scoreDD >= scoreMD);
				scoreBest = scoreDD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			{
		// Delete E=LetterA+GapB
			SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen2;
			SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2.get();

			SCORE scoreBest;
			if (scoreME >= scoreEE)
				{
				scoreBest = scoreME;
				TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else
				{
				assert(scoreEE >= scoreME);
				scoreBest = scoreEE;
				TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
				}
			DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert I=GapA+LetterB
			{
			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend.get();

			SCORE scoreBest;
			if (scoreMI >= scoreII)
				{
				scoreBest = scoreMI;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else 
				{
				assert(scoreII > scoreMI);
				scoreBest = scoreII;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert J=GapA+LetterB
			{
			SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen2;
			SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2.get();

			SCORE scoreBest;
			if (scoreMJ >= scoreJJ)
				{
				scoreBest = scoreMJ;
				TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else 
				{
				assert(scoreJJ > scoreMJ);
				scoreBest = scoreJJ;
				TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
				}
			DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			scoreGapCloseA = PPA.m_scoreGapClose;
			scoreGapClose2A = PPA.m_scoreGapClose2;
			}
		scoreGapCloseB = PPB.m_scoreGapClose;
		scoreGapClose2B = PPB.m_scoreGapClose2;
		}

#if TRACE
	Log("\n");
	Log("DA Simple DPL:\n");
	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPM:\n");
	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPD:\n");
	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPE:\n");
	ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPI:\n");
	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPJ:\n");
	ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBM:\n");
	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBD:\n");
	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBE:\n");
	ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBI:\n");
	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBJ:\n");
	ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
#endif

// Trace-back
// ==========
	Path.Clear();

// Find last edge
	SCORE M = DPM(uLengthA, uLengthB);
	SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
	SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2;
	SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
	SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2;
	char cEdgeType = '?';

	SCORE BestScore = M;
	cEdgeType = 'M';
	if (D > BestScore)
		{
		cEdgeType = 'D';
		BestScore = D;
		}
	if (E > BestScore)
		{
		cEdgeType = 'E';
		BestScore = E;
		}
	if (I > BestScore)
		{
		cEdgeType = 'I';
		BestScore = I;
		}
	if (J > BestScore)
		{
		cEdgeType = 'J';
		BestScore = J;
		}

#if	TRACE
	Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
	  M, D, E, I, J, cEdgeType);
#endif

	unsigned PLA = uLengthA;
	unsigned PLB = uLengthB;
	for (;;)
		{
		PWEdge Edge;
		Edge.cType = XlatEdgeType(cEdgeType);
		Edge.uPrefixLengthA = PLA;
		Edge.uPrefixLengthB = PLB;
#if	TRACE
		Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
#endif
		Path.PrependEdge(Edge);

		switch (cEdgeType)
			{
		case 'M':
			assert(PLA > 0);
			assert(PLB > 0);
			cEdgeType = TBM(PLA, PLB);
			--PLA;
			--PLB;
			break;

		case 'D':
			assert(PLA > 0);
			cEdgeType = TBD(PLA, PLB);
			--PLA;
			break;

		case 'E':
			assert(PLA > 0);
			cEdgeType = TBE(PLA, PLB);
			--PLA;
			break;

		case 'I':
			assert(PLB > 0);
			cEdgeType = TBI(PLA, PLB);
			--PLB;
			break;
		
		case 'J':
			assert(PLB > 0);
			cEdgeType = TBJ(PLA, PLB);
			--PLB;
			break;

		default:
			Quit("Invalid edge %c", cEdgeType);
			}
		if (0 == PLA && 0 == PLB)
			break;
		}
	Path.Validate();

//	SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);

#if	TRACE
	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
	Path.LogMe();
	Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
#endif

	if (g_bKeepSimpleDP.get())
		{
		g_DPM.get() = DPM_;
		g_DPD.get() = DPD_;
		g_DPE.get() = DPE_;
		g_DPI.get() = DPI_;
		g_DPJ.get() = DPJ_;

		g_TBM.get() = TBM_;
		g_TBD.get() = TBD_;
		g_TBE.get() = TBE_;
		g_TBI.get() = TBI_;
		g_TBJ.get() = TBJ_;
		}
	else
		{
		delete[] DPM_;
		delete[] DPD_;
		delete[] DPE_;
		delete[] DPI_;
		delete[] DPJ_;

		delete[] TBM_;
		delete[] TBD_;
		delete[] TBE_;
		delete[] TBI_;
		delete[] TBJ_;
		}

	return BestScore;
	}
Ejemplo n.º 17
0
void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
	{
	Path.Clear();
	unsigned iA = 0;
	unsigned iB = 0;
	int nA = esA[iA++];
	int nB = esB[iB++];
	unsigned uPrefixLengthA = 0;
	unsigned uPrefixLengthB = 0;
	for (;;)
		{
		char cType;
		if (nA > 0)
			{
			if (nB > 0)
				{
				cType = 'M';
				--nA;
				--nB;
				}
			else if (nB < 0)
				{
				cType = 'D';
				--nA;
				++nB;
				}
			else
				assert(false);
			}
		else if (nA < 0)
			{
			if (nB > 0)
				{
				cType = 'I';
				++nA;
				--nB;
				}
			else
				assert(false);
			}
		else
			assert(false);

		switch (cType)
			{
		case 'M':
			++uPrefixLengthA;
			++uPrefixLengthB;
			break;
		case 'D':
			++uPrefixLengthA;
			break;
		case 'I':
			++uPrefixLengthB;
			break;
			}

		PWEdge Edge;
		Edge.cType = cType;
		Edge.uPrefixLengthA = uPrefixLengthA;
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.AppendEdge(Edge);

		if (nA == 0)
			{
			if (0 == esA[iA])
				{
				assert(0 == esB[iB]);
				break;
				}
			nA = esA[iA++];
			}
		if (nB == 0)
			nB = esB[iB++];
		}
	}
Ejemplo n.º 18
0
SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
  PWPath &Path)
	{
#if	TRACE
	Log("\n");
	Log("TraceBack LengthA=%u LengthB=%u\n", uLengthA, uLengthB);
#endif
	assert(uLengthB > 0 && uLengthA > 0);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	Path.Clear();

	unsigned uPrefixLengthA = uLengthA;
	unsigned uPrefixLengthB = uLengthB;

	const SCORE scoreM = DPM(uPrefixLengthA, uPrefixLengthB);
	SCORE scoreD = DPD(uPrefixLengthA, uPrefixLengthB);
	SCORE scoreI = DPI(uPrefixLengthA, uPrefixLengthB);

	const ProfPos &LastPPA = PA[uLengthA - 1];
	const ProfPos &LastPPB = PB[uLengthB - 1];

	scoreD += LastPPA.m_scoreGapClose;
	scoreI += LastPPB.m_scoreGapClose;

	char cEdgeType = cInsane;
	SCORE scoreMax;
	if (scoreM >= scoreD && scoreM >= scoreI)
		{
		scoreMax = scoreM;
		cEdgeType = 'M';
		}
	else if (scoreD >= scoreM && scoreD >= scoreI)
		{
		scoreMax = scoreD;
		cEdgeType = 'D';
		}
	else
		{
		assert(scoreI >= scoreM && scoreI >= scoreD);
		scoreMax = scoreI;
		cEdgeType = 'I';
		}

	for (;;)
		{
		if ('S' == cEdgeType)
			break;

		PWEdge Edge;
		Edge.cType = cEdgeType;
		Edge.uPrefixLengthA = uPrefixLengthA;
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.PrependEdge(Edge);

		char cPrevEdgeType;
		unsigned uPrevPrefixLengthA = uPrefixLengthA;
		unsigned uPrevPrefixLengthB = uPrefixLengthB;

		switch (cEdgeType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const ProfPos &PPA = PA[uPrefixLengthA - 1];
			const ProfPos &PPB = PB[uPrefixLengthB - 1];

			const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB);
			const SCORE scoreMatch = ScoreProfPos2(PPA, PPB);

			SCORE scoreSM;
			if (1 == uPrefixLengthA && 1 == uPrefixLengthB)
				scoreSM = scoreMatch;
			else
				scoreSM = MINUS_INFINITY;

			SCORE scoreMM = MINUS_INFINITY;
			SCORE scoreDM = MINUS_INFINITY;
			SCORE scoreIM = MINUS_INFINITY;
			if (uPrefixLengthA > 1 && uPrefixLengthB > 1)
				scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + scoreMatch;
			if (uPrefixLengthA > 1)
				{
				SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose;
				scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch;
				}
			if (uPrefixLengthB > 1)
				{
				SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose;
				scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch;
				}

			if (EQ(scoreMM, Score))
				cPrevEdgeType = 'M';
			else if (EQ(scoreDM, Score))
				cPrevEdgeType = 'D';
			else if (EQ(scoreIM, Score))
				cPrevEdgeType = 'I';
			else if (EQ(scoreSM, Score))
				cPrevEdgeType = 'S';
			else
				Quit("TraceBack: failed to match M score=%g M=%g D=%g I=%g S=%g",
				  Score, scoreMM, scoreDM, scoreIM, scoreSM);

			--uPrevPrefixLengthA;
			--uPrevPrefixLengthB;
			break;
			}

		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB);

			SCORE scoreMD = MINUS_INFINITY;
			SCORE scoreDD = MINUS_INFINITY;
			SCORE scoreSD = MINUS_INFINITY;
			if (uPrefixLengthB == 0)
				{
				if (uPrefixLengthA == 1)
					scoreSD = PA[0].m_scoreGapOpen;
				else
					scoreSD = DPD(uPrefixLengthA - 1, 0);
				}
			if (uPrefixLengthA > 1)
				{
				const ProfPos &PPA = PA[uPrefixLengthA - 1];
				SCORE scoreTransMD = PPA.m_scoreGapOpen;
				scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD;
				scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
				}

			if (EQ(Score, scoreMD))
				cPrevEdgeType = 'M';
			else if (EQ(Score, scoreDD))
				cPrevEdgeType = 'D';
			else if (EQ(Score, scoreSD))
				cPrevEdgeType = 'S';
			else
				Quit("TraceBack: failed to match D");

			--uPrevPrefixLengthA;
			break;
			}

		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB);

			SCORE scoreMI = MINUS_INFINITY;
			SCORE scoreII = MINUS_INFINITY;
			SCORE scoreSI = MINUS_INFINITY;
			if (uPrefixLengthA == 0)
				{
				if (uPrefixLengthB == 1)
					scoreSI = PB[0].m_scoreGapOpen;
				else
					scoreSI = DPI(0, uPrefixLengthB - 1);
				}
			if (uPrefixLengthB > 1)
				{
				const ProfPos &PPB = PB[uPrefixLengthB - 1];
				SCORE scoreTransMI = PPB.m_scoreGapOpen;
				scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI;
				scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
				}

			if (EQ(Score, scoreMI))
				cPrevEdgeType = 'M';
			else if (EQ(Score, scoreII))
				cPrevEdgeType = 'I';
			else if (EQ(Score, scoreSI))
				cPrevEdgeType = 'S';
			else
				Quit("TraceBack: failed to match I");

			--uPrevPrefixLengthB;
			break;
			}

		default:
			assert(false);
			}
#if	TRACE
		Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB);
		Log("\n");
#endif
		cEdgeType = cPrevEdgeType;
		uPrefixLengthA = uPrevPrefixLengthA;
		uPrefixLengthB = uPrevPrefixLengthB;
		}

	return scoreMax;
	}
Ejemplo n.º 19
0
void SPTest()
	{
	SetPPScore(PPSCORE_SV);

	SetListFileName("c:\\tmp\\muscle.log", false);

	TextFile file1("c:\\tmp\\msa1.afa");
	TextFile file2("c:\\tmp\\msa2.afa");

	MSA msa1;
	MSA msa2;

	msa1.FromFile(file1);
	msa2.FromFile(file2);

	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();

	const unsigned uColCount = msa1.GetColCount();
	if (msa2.GetColCount() != uColCount)
		Quit("Different lengths");

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();
	const unsigned uSeqCount = uSeqCount1 + uSeqCount2;

	MSA::SetIdCount(uSeqCount);

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		msa1.SetSeqWeight(uSeqIndex1, 1.0);
		msa1.SetSeqId(uSeqIndex1, uSeqIndex1);
		}

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		{
		msa2.SetSeqWeight(uSeqIndex2, 1.0);
		msa2.SetSeqId(uSeqIndex2, uSeqCount1 + uSeqIndex2);
		}

	MSA alnA;
	MSA alnB;

	char strPathA[1024];
	char strPathB[1024];
	MakePath(uColCount, INDELS, strPathA);
	MakePath(uColCount, INDELS, strPathB);

	PWPath PathA;
	PWPath PathB;
	PathA.FromStr(strPathA);
	PathB.FromStr(strPathB);

	Log("PathA=\n");
	PathA.LogMe();
	Log("PathB=\n");
	PathB.LogMe();

	AlignTwoMSAsGivenPath(PathA, msa1, msa2, alnA);
	AlignTwoMSAsGivenPath(PathB, msa1, msa2, alnB);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		alnA.SetSeqWeight(uSeqIndex, 1.0);
		alnB.SetSeqWeight(uSeqIndex, 1.0);
		}

	unsigned Seqs1[1024];
	unsigned Seqs2[1024];

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		Seqs1[uSeqIndex1] = uSeqIndex1;

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		Seqs2[uSeqIndex2] = uSeqCount1 + uSeqIndex2;

	MSA msaA1;
	MSA msaA2;
	MSA msaB1;
	MSA msaB2;
	MSAFromSeqSubset(alnA, Seqs1, uSeqCount1, msaA1);
	MSAFromSeqSubset(alnB, Seqs1, uSeqCount1, msaB1);
	MSAFromSeqSubset(alnA, Seqs2, uSeqCount2, msaA2);
	MSAFromSeqSubset(alnB, Seqs2, uSeqCount2, msaB2);

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		msaA1.SetSeqWeight(uSeqIndex1, 1.0);
		msaB1.SetSeqWeight(uSeqIndex1, 1.0);
		}

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		{
		msaA2.SetSeqWeight(uSeqIndex2, 1.0);
		msaB2.SetSeqWeight(uSeqIndex2, 1.0);
		}

	Log("msaA1=\n");
	msaA1.LogMe();

	Log("msaB1=\n");
	msaB1.LogMe();

	Log("msaA2=\n");
	msaA2.LogMe();

	Log("msaB2=\n");
	msaB2.LogMe();

	Log("alnA=\n");
	alnA.LogMe();

	Log("AlnB=\n");
	alnB.LogMe();

	Log("\nSPA\n---\n");
	SCORE SPA = ObjScoreSP(alnA);
	Log("\nSPB\n---\n");
	SCORE SPB = ObjScoreSP(alnB);

	Log("\nXPA\n---\n");
	SCORE XPA = ObjScoreXP(msaA1, msaA2);
	Log("\nXPB\n---\n");
	SCORE XPB = ObjScoreXP(msaB1, msaB2);

	Log("SPA=%.4g SPB=%.4g Diff=%.4g\n", SPA, SPB, SPA - SPB);
	Log("XPA=%.4g XPB=%.4g Diff=%.4g\n", XPA, XPB, XPA - XPB);
	}