Beispiel #1
0
SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
  bool bLockLeft, bool bLockRight)
	{
	const unsigned uLengthA = msa1.GetColCount();
	const unsigned uLengthB = msa2.GetColCount();

	ProfPos *PA = ProfileFromMSA(msa1);
	ProfPos *PB = ProfileFromMSA(msa2);

	if (bLockLeft)
		{
		PA[0].m_scoreGapOpen = MINUS_INFINITY;
		PB[0].m_scoreGapOpen = MINUS_INFINITY;
		}

	if (bLockRight)
		{
		PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
		PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
		}

	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
	if (r < 1)
		r = 1/r;

	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);

	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);

	delete[] PA;
	delete[] PB;

	return Score;
	}
Beispiel #2
0
// Objective score defined as the dynamic programming score.
// Input is two alignments, which must be of the same length.
// Result is the same profile-profile score that is optimized
// by dynamic programming.
SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
	{
	const unsigned uColCount = msa1.GetColCount();
	if (msa2.GetColCount() != uColCount)
		Quit("ObjScoreDP, must be same length");

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	const ProfPos *PA = ProfileFromMSA(msa1);
	const ProfPos *PB = ProfileFromMSA(msa2);

	return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
	}
Beispiel #3
0
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
	{
	if (g_PPScore != PPSCORE_LE)
		Quit("FastScoreMSA_LASimple: LA");

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	const ProfPos *Prof = ProfileFromMSA(msa);

	if (0 != MatchScore)
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;

	SCORE scoreTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
		SCORE scoreSeq = 0;
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const ProfPos &PP = Prof[uColIndex];
			if (msa.IsGap(uSeqIndex, uColIndex))
				{
				bool bOpen = (0 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex - 1));
				bool bClose = (uColCount - 1 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex + 1));

				if (bOpen)
					scoreSeq += PP.m_scoreGapOpen;
				if (bClose)
					scoreSeq += PP.m_scoreGapClose;
				//if (!bOpen && !bClose)
				//	scoreSeq += PP.m_scoreGapExtend;
				}
			else if (msa.IsWildcard(uSeqIndex, uColIndex))
				continue;
			else
				{
				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
				const SCORE scoreMatch = PP.m_AAScores[uLetter];
				if (0 != MatchScore)
					MatchScore[uColIndex] += weightSeq*scoreMatch;
				scoreSeq += scoreMatch;
				}
			}
		scoreTotal += weightSeq*scoreSeq;
		}

	delete[] Prof;
	return scoreTotal;
	}
Beispiel #4
0
static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);
	if (TreeNeededForWeighting(g_SeqWeight2.get()))
		{
		TreeFromMSA(msa, tree, g_Cluster2.get(), g_Distance2.get(), g_Root1.get());
		SetMuscleTree(tree);
		}
	return ProfileFromMSA(msa);
	}
Beispiel #5
0
ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a)
	{
	assert(GuideTree.IsRooted());

#if	TRACE
	Log("GuideTree:\n");
	GuideTree.LogMe();
#endif

	const unsigned uSeqCount = v.Length();
	const unsigned uNodeCount = 2*uSeqCount - 1;
	const unsigned uIterCount = uSeqCount - 1;

	WEIGHT *Weights = new WEIGHT[uSeqCount];
	CalcClustalWWeights(GuideTree, Weights);

	ProgNode *ProgNodes = new ProgNode[uNodeCount];

	unsigned uJoin = 0;
	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
	SetProgressDesc("Align node");
	do
		{
		if (GuideTree.IsLeaf(uTreeNodeIndex))
			{
			if (uTreeNodeIndex >= uNodeCount)
				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
			ProgNode &Node = ProgNodes[uTreeNodeIndex];
			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
			if (uId >= uSeqCount)
				Quit("Seq index out of range");
			const Seq &s = *(v[uId]);
			Node.m_MSA.FromSeq(s);
			Node.m_MSA.SetSeqId(0, uId);
			Node.m_uLength = Node.m_MSA.GetColCount();
			Node.m_Weight = Weights[uId];
		// TODO: Term gaps settable
			Node.m_Prof = ProfileFromMSA(Node.m_MSA);
			Node.m_EstringL = 0;
			Node.m_EstringR = 0;
#if	TRACE
			Log("Leaf id=%u\n", uId);
			Log("MSA=\n");
			Node.m_MSA.LogMe();
			Log("Profile (from MSA)=\n");
			ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA);
#endif
			}
		else
			{
			Progress(uJoin, uSeqCount - 1);
			++uJoin;

			const unsigned uMergeNodeIndex = uTreeNodeIndex;
			ProgNode &Parent = ProgNodes[uMergeNodeIndex];

			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);

			if (g_bVerbose)
				{
				Log("Align: (");
				LogLeafNames(GuideTree, uLeft);
				Log(") (");
				LogLeafNames(GuideTree, uRight);
				Log(")\n");
				}

			ProgNode &Node1 = ProgNodes[uLeft];
			ProgNode &Node2 = ProgNodes[uRight];

#if	TRACE
			Log("AlignTwoMSAs:\n");
#endif
			AlignTwoProfs(
			  Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
			  Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
			  Parent.m_Path,
			  &Parent.m_Prof, &Parent.m_uLength);
#if	TRACE_LENGTH_DELTA
			{
			unsigned L = Node1.m_uLength;
			unsigned R = Node2.m_uLength;
			unsigned P = Parent.m_Path.GetEdgeCount();
			unsigned Max = L > R ? L : R;
			unsigned d = P - Max;
			Log("LD%u;%u;%u;%u\n", L, R, P, d);
			}
#endif
			PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);

			Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;

#if	VALIDATE
			{
#if	TRACE
			Log("AlignTwoMSAs:\n");
#endif
			PWPath TmpPath;
			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath);
			ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true);
			ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true);
			unsigned uLength = Parent.m_MSA.GetColCount();
			ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true);

#if	TRACE
			Log("Node1 MSA=\n");
			Node1.m_MSA.LogMe();

			Log("Node1 prof=\n");
			ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
			Log("Node1 prof (from MSA)=\n");
			ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA);

			AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount());

			Log("Node2 prof=\n");
			ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA);

			Log("Node2 MSA=\n");
			Node2.m_MSA.LogMe();

			Log("Node2 prof (from MSA)=\n");
			ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA);

			AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount());

			TmpPath.AssertEqual(Parent.m_Path);

			Log("Parent MSA=\n");
			Parent.m_MSA.LogMe();

			Log("Parent prof=\n");
			ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA);

			Log("Parent prof (from MSA)=\n");
			ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA);

#endif	// TRACE
			AssertProfsEq(Parent.m_Prof, Parent.m_uLength,
			  TmpProf, Parent.m_MSA.GetColCount());
			delete[] P1;
			delete[] P2;
			delete[] TmpProf;
			}
#endif	// VALIDATE

			Node1.m_MSA.Clear();
			Node2.m_MSA.Clear();

		// Don't delete profiles, may need them for tree refinement.
			//delete[] Node1.m_Prof;
			//delete[] Node2.m_Prof;
			//Node1.m_Prof = 0;
			//Node2.m_Prof = 0;
			}
		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);
	ProgressStepsDone();

	if (g_bBrenner)
		MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a);
	else
		MakeRootMSA(v, GuideTree, ProgNodes, a);

#if	VALIDATE
	{
	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
	AssertMSAEq(a, RootProgNode.m_MSA);
	}
#endif

	delete[] Weights;
	return ProgNodes;
	}