Example #1
0
void SubFam(const Tree &tree, unsigned uMaxLeafCount, unsigned SubFams[], unsigned *ptruSubFamCount)
	{
	*ptruSubFamCount = 0;
	SubFamRecurse(tree, tree.GetRootNodeIndex(), uMaxLeafCount, SubFams, *ptruSubFamCount);

#if	TRACE
	{
	Log("\n");
	Log("Tree:\n");
	tree.LogMe();
	//void DrawTree(const Tree &tree);
	//DrawTree(tree);
	Log("\n");
	Log("%d subfams:\n", *ptruSubFamCount);
	for (unsigned i = 0; i < *ptruSubFamCount; ++i)
		Log("  %d=%d", i, SubFams[i]);
	Log("\n");
	}
#endif
	}
Example #2
0
void TestBiPart()
	{
	SetListFileName("c:\\tmp\\lobster.log", false);
	Tree tree;
	TextFile fileIn("c:\\tmp\\test.phy");
	tree.FromFile(fileIn);
	tree.LogMe();

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *Leaves1 = new unsigned[uNodeCount];
	unsigned *Leaves2 = new unsigned[uNodeCount];

	PhyEnumEdgeState ES;
	bool bDone = false;
	for (;;)
		{
		unsigned uCount1 = uInsane;
		unsigned uCount2 = uInsane;
		bool bOk = PhyEnumBiParts(tree, ES, Leaves1, &uCount1, Leaves2, &uCount2);
		Log("PEBP=%d ES.Init=%d ES.ni1=%d ES.ni2=%d\n",
		  bOk,
		  ES.m_bInit,
		  ES.m_uNodeIndex1,
		  ES.m_uNodeIndex2);
		if (!bOk)
			break;
		Log("\n");
		Log("Part1: ");
		for (unsigned n = 0; n < uCount1; ++n)
			Log(" %d(%s)", Leaves1[n], tree.GetLeafName(Leaves1[n]));
		Log("\n");
		Log("Part2: ");
		for (unsigned n = 0; n < uCount2; ++n)
			Log(" %d(%s)", Leaves2[n], tree.GetLeafName(Leaves2[n]));
		Log("\n");
		}
	}
Example #3
0
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
	{
#if	TRACE
	Log("CalcClustalWWeights\n");
	tree.LogMe();
#endif

	const unsigned uLeafCount = tree.GetLeafCount();
	if (0 == uLeafCount)
		return;
	else if (1 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 0.5;
		Weights[1] = (WEIGHT) 0.5;
		return;
		}

	if (!tree.IsRooted())
		Quit("CalcClustalWWeights requires rooted tree");

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));

	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
	if (uLeavesUnderRoot != uLeafCount)
		Quit("WeightsFromTreee: Internal error, root count %u %u",
		  uLeavesUnderRoot, uLeafCount);

#if	TRACE
	Log("Node  Leaves    Length  Strength\n");
	Log("----  ------  --------  --------\n");
	//    1234  123456  12345678  12345678
#endif

	double *Strengths = new double[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsRoot(uNodeIndex))
			{
			Strengths[uNodeIndex] = 0.0;
			continue;
			}
		const unsigned uParent = tree.GetParent(uNodeIndex);
		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
		const double dStrength = dLength / (double) uLeaves;
		Strengths[uNodeIndex] = dStrength;
#if	TRACE
		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
#endif
		}

#if	TRACE
	Log("\n");
	Log("                 Seq  Path..Weight\n");
	Log("--------------------  ------------\n");
#endif
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
#if	TRACE
		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
#endif
		if (!tree.IsLeaf(uLeafNodeIndex))
			Quit("CalcClustalWWeights: leaf");

		double dWeight = 0;
		unsigned uNode = uLeafNodeIndex;
		while (!tree.IsRoot(uNode))
			{
			dWeight += Strengths[uNode];
			uNode = tree.GetParent(uNode);
#if	TRACE
			Log("->%u(%g)", uNode, Strengths[uNode]);
#endif
			}
		if (dWeight < 0.0001)
			{
#if	TRACE
			Log("zero->one");
#endif
			dWeight = 1.0;
			}
		Weights[n] = (WEIGHT) dWeight;
#if	TRACE
		Log(" = %g\n", dWeight);
#endif
		}

	delete[] Strengths;
	delete[] LeavesUnderNode;

	Normalize(Weights, uLeafCount);
	}
Example #4
0
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
Example #5
0
void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage)
	{
	g_uLeafCount.get() = DC.GetCount();

	g_uTriangleSize.get() = (g_uLeafCount.get()*(g_uLeafCount.get() - 1))/2;
	g_uInternalNodeCount.get() = g_uLeafCount.get() - 1;

	g_Dist.get() = new dist_t[g_uTriangleSize.get()];

	g_uNodeIndex.get() = new unsigned[g_uLeafCount.get()];
	g_uNearestNeighbor.get() = new unsigned[g_uLeafCount.get()];
	g_MinDist.get() = new dist_t[g_uLeafCount.get()];
	unsigned *Ids = new unsigned [g_uLeafCount.get()];
	char **Names = new char *[g_uLeafCount.get()];

	g_uLeft.get() = new unsigned[g_uInternalNodeCount.get()];
	g_uRight.get() = new unsigned[g_uInternalNodeCount.get()];
	g_Height.get() = new dist_t[g_uInternalNodeCount.get()];
	g_LeftLength.get() = new dist_t[g_uInternalNodeCount.get()];
	g_RightLength.get() = new dist_t[g_uInternalNodeCount.get()];

	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		{
		g_MinDist.get()[i] = BIG_DIST;
		g_uNodeIndex.get()[i] = i;
		g_uNearestNeighbor.get()[i] = uInsane;
		Ids[i] = DC.GetId(i);
		Names[i] = strsave(DC.GetName(i));
		}

	for (unsigned i = 0; i < g_uInternalNodeCount.get(); ++i)
		{
		g_uLeft.get()[i] = uInsane;
		g_uRight.get()[i] = uInsane;
		g_LeftLength.get()[i] = BIG_DIST;
		g_RightLength.get()[i] = BIG_DIST;
		g_Height.get()[i] = BIG_DIST;
		}

// Compute initial NxN triangular distance matrix.
// Store minimum distance for each full (not triangular) row.
// Loop from 1, not 0, because "row" is 0, 1 ... i-1,
// so nothing to do when i=0.
	for (unsigned i = 1; i < g_uLeafCount.get(); ++i)
		{
		dist_t *Row = g_Dist.get() + TriangleSubscript(i, 0);
		DC.CalcDistRange(i, Row);
		for (unsigned j = 0; j < i; ++j)
			{
			const dist_t d = Row[j];
			if (d < g_MinDist.get()[i])
				{
				g_MinDist.get()[i] = d;
				g_uNearestNeighbor.get()[i] = j;
				}
			if (d < g_MinDist.get()[j])
				{
				g_MinDist.get()[j] = d;
				g_uNearestNeighbor.get()[j] = i;
				}
			}
		}

#if	TRACE
	Log("Initial state:\n");
	ListState();
#endif

	for (g_uInternalNodeIndex.get() = 0; g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1;
	  ++g_uInternalNodeIndex.get())
		{
#if	TRACE
		Log("\n");
		Log("Internal node index %5u\n", g_uInternalNodeIndex.get());
		Log("-------------------------\n");
#endif

	// Find nearest neighbors
		unsigned Lmin = uInsane;
		unsigned Rmin = uInsane;
		dist_t dtMinDist = BIG_DIST;
		for (unsigned j = 0; j < g_uLeafCount.get(); ++j)
			{
			if (uInsane == g_uNodeIndex.get()[j])
				continue;

			dist_t d = g_MinDist.get()[j];
			if (d < dtMinDist)
				{
				dtMinDist = d;
				Lmin = j;
				Rmin = g_uNearestNeighbor.get()[j];
				assert(uInsane != Rmin);
				assert(uInsane != g_uNodeIndex.get()[Rmin]);
				}
			}

		assert(Lmin != uInsane);
		assert(Rmin != uInsane);
		assert(dtMinDist != BIG_DIST);

#if	TRACE
		Log("Nearest neighbors Lmin %u[=%u] Rmin %u[=%u] dist %.3g\n",
		  Lmin,
		  g_uNodeIndex.get()[Lmin],
		  Rmin,
		  g_uNodeIndex.get()[Rmin],
		  dtMinDist);
#endif

	// Compute distances to new node
	// New node overwrites row currently assigned to Lmin
		dist_t dtNewMinDist = BIG_DIST;
		unsigned uNewNearestNeighbor = uInsane;
		for (unsigned j = 0; j < g_uLeafCount.get(); ++j)
			{
			if (j == Lmin || j == Rmin)
				continue;
			if (uInsane == g_uNodeIndex.get()[j])
				continue;

			const unsigned vL = TriangleSubscript(Lmin, j);
			const unsigned vR = TriangleSubscript(Rmin, j);
			const dist_t dL = g_Dist.get()[vL];
			const dist_t dR = g_Dist.get()[vR];
			dist_t dtNewDist;

			switch (Linkage)
				{
			case LINKAGE_Avg:
				dtNewDist = AVG(dL, dR);
				break;

			case LINKAGE_Min:
				dtNewDist = MIN(dL, dR);
				break;

			case LINKAGE_Max:
				dtNewDist = MAX(dL, dR);
				break;

			case LINKAGE_Biased:
				dtNewDist = g_dSUEFF.get()*AVG(dL, dR) + (1 - g_dSUEFF.get())*MIN(dL, dR);
				break;

			default:
				Quit("UPGMA2: Invalid LINKAGE_%u", Linkage);
				}

		// Nasty special case.
		// If nearest neighbor of j is Lmin or Rmin, then make the new
		// node (which overwrites the row currently occupied by Lmin)
		// the nearest neighbor. This situation can occur when there are
		// equal distances in the matrix. If we don't make this fix,
		// the nearest neighbor pointer for j would become invalid.
		// (We don't need to test for == Lmin, because in that case
		// the net change needed is zero due to the change in row
		// numbering).
			if (g_uNearestNeighbor.get()[j] == Rmin)
				g_uNearestNeighbor.get()[j] = Lmin;

#if	TRACE
			Log("New dist to %u = (%u/%.3g + %u/%.3g)/2 = %.3g\n",
			  j, Lmin, dL, Rmin, dR, dtNewDist);
#endif
			g_Dist.get()[vL] = dtNewDist;
			if (dtNewDist < dtNewMinDist)
				{
				dtNewMinDist = dtNewDist;
				uNewNearestNeighbor = j;
				}
			}

		assert(g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1 || BIG_DIST != dtNewMinDist);
		assert(g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1 || uInsane != uNewNearestNeighbor);

		const unsigned v = TriangleSubscript(Lmin, Rmin);
		const dist_t dLR = g_Dist.get()[v];
		const dist_t dHeightNew = dLR/2;
		const unsigned uLeft = g_uNodeIndex.get()[Lmin];
		const unsigned uRight = g_uNodeIndex.get()[Rmin];
		const dist_t HeightLeft =
		  uLeft < g_uLeafCount.get() ? 0 : g_Height.get()[uLeft - g_uLeafCount.get()];
		const dist_t HeightRight =
		  uRight < g_uLeafCount.get() ? 0 : g_Height.get()[uRight - g_uLeafCount.get()];

		g_uLeft.get()[g_uInternalNodeIndex.get()] = uLeft;
		g_uRight.get()[g_uInternalNodeIndex.get()] = uRight;
		g_LeftLength.get()[g_uInternalNodeIndex.get()] = dHeightNew - HeightLeft;
		g_RightLength.get()[g_uInternalNodeIndex.get()] = dHeightNew - HeightRight;
		g_Height.get()[g_uInternalNodeIndex.get()] = dHeightNew;

	// Row for left child overwritten by row for new node
		g_uNodeIndex.get()[Lmin] = g_uLeafCount.get() + g_uInternalNodeIndex.get();
		g_uNearestNeighbor.get()[Lmin] = uNewNearestNeighbor;
		g_MinDist.get()[Lmin] = dtNewMinDist;

	// Delete row for right child
		g_uNodeIndex.get()[Rmin] = uInsane;

#if	TRACE
		Log("\nInternalNodeIndex=%u Lmin=%u Rmin=%u\n",
		  g_uInternalNodeIndex.get(), Lmin, Rmin);
		ListState();
#endif
		}

	unsigned uRoot = g_uLeafCount.get() - 2;
	tree.Create(g_uLeafCount.get(), uRoot, g_uLeft.get(), g_uRight.get(), g_LeftLength.get(), g_RightLength.get(),
	  Ids, Names);

#if	TRACE
	tree.LogMe();
#endif

	delete[] g_Dist.get();

	delete[] g_uNodeIndex.get();
	delete[] g_uNearestNeighbor.get();
	delete[] g_MinDist.get();
	delete[] g_Height.get();

	delete[] g_uLeft.get();
	delete[] g_uRight.get();
	delete[] g_LeftLength.get();
	delete[] g_RightLength.get();
	
	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		free(Names[i]);
	delete[] Names;
	delete[] Ids;
	}
Example #6
0
void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2,
  ROOT RootMethod)
	{
#if	TRACE
	tree.LogMe();
#endif
	if (tree.IsRooted())
		Quit("FindRoot: tree already rooted");

	const unsigned uNodeCount = tree.GetNodeCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	if (uNodeCount < 2)
		Quit("Root: don't support trees with < 2 edges");

	EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		EIs[uNodeIndex] = new EdgeInfo[3];

	EdgeList Edges;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		if (tree.IsLeaf(uNodeIndex))
			{
			unsigned uParent = tree.GetNeighbor1(uNodeIndex);
			Edges.Add(uParent, uNodeIndex);
			}

#if	TRACE
	Log("Edges: ");
	Edges.LogMe();
#endif

// Main loop: iterate until all distances known
	double dAllMaxDist = -1e20;
	unsigned uMaxFrom = NULL_NEIGHBOR;
	unsigned uMaxTo = NULL_NEIGHBOR;
	for (;;)
		{
		EdgeList NextEdges;

#if	TRACE
		Log("\nTop of main loop\n");
		Log("Edges: ");
		Edges.LogMe();
		Log("MDs:\n");
		ListEIs(EIs, uNodeCount);
#endif

	// For all edges
		const unsigned uEdgeCount = Edges.GetCount();
		if (0 == uEdgeCount)
			break;
		for (unsigned n = 0; n < uEdgeCount; ++n)
			{
			unsigned uNodeFrom;
			unsigned uNodeTo;
			Edges.GetEdge(n, &uNodeFrom, &uNodeTo);

			CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
#if	TRACE
			Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
#endif
			const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
			for (unsigned i = 0; i < uNeighborCount; ++i)
				{
				const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
				if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
				  AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
					NextEdges.Add(uNeighborIndex, uNodeFrom);
				}
			}
		Edges.Copy(NextEdges);
		}

#if	TRACE
	ListEIs(EIs, uNodeCount);
#endif

	switch (RootMethod)
		{
	case ROOT_MidLongestSpan:
		RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	case ROOT_MinAvgLeafDist:
		RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	default:
		Quit("Invalid RootMethod=%d", RootMethod);
		}

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		delete[] EIs[uNodeIndex];
	delete[] EIs;
	}
Example #7
0
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
  unsigned NewNodeIndexToOldNodeIndex[])
	{
#if	TRACE
	Log("DiffTreesE NewTree:\n");
	NewTree.LogMe();
	Log("\n");
	Log("OldTree:\n");
	OldTree.LogMe();
#endif

	if (!NewTree.IsRooted() || !OldTree.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = NewTree.GetNodeCount();
	const unsigned uOldNodeCount = OldTree.GetNodeCount();
	const unsigned uLeafCount = NewTree.GetLeafCount();
	const unsigned uOldLeafCount = OldTree.GetLeafCount();
	if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
		Quit("DiffTreesE: different node counts");

	{
	unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
	for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
		{
		if (OldTree.IsLeaf(uOldNodeIndex))
			{
			unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
			IdToOldNodeIndex[Id] = uOldNodeIndex;
			}
		}

// Initialize NewNodeIndexToOldNodeIndex[]
// All internal nodes are marked as changed, but may be updated later.
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
			assert(uId < uLeafCount);

			unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
			assert(uOldNodeIndex < uNodeCount);

			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
			}
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}
	delete[] IdToOldNodeIndex;
	}

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNewNodeIndex;
	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			continue;

	// If either child is changed, flag this node as changed and continue.
		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
		if (NODE_CHANGED == uOldLeft)
			{
			NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
			continue;
			}

		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
		unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
			{
			NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
			continue;
			}

		unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
		unsigned uOldParentRight = OldTree.GetParent(uOldRight);
		if (uOldParentLeft == uOldParentRight)
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}

#if TRACE
	{
	Log("NewToOld ");
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		Log(" [%3u]=", uNewNodeIndex);
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
			Log("  X");
		else
			Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
		if ((uNewNodeIndex+1)%8 == 0)
			Log("\n         ");
		}
	Log("\n");
	}
#endif

#if	DEBUG
	{
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			if (uOld >= uNodeCount)
				{
				Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
				  uNewNodeIndex, uOld, uNodeCount);
				Quit("Diff check failed");
				}
			unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
			unsigned uIdOld = OldTree.GetLeafId(uOld);
			if (uIdNew != uIdOld)
				{
				Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
				  uNewNodeIndex, uOld, uIdNew, uIdOld);
				Quit("Diff check failed");
				}
			continue;
			}

		if (NODE_CHANGED == uOld)
			continue;

		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);

		unsigned uOldLeft = OldTree.GetLeft(uOld);
		unsigned uOldRight = OldTree.GetRight(uOld);

		unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
		unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];

		bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
		bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
		if (!bSameNotRotated && !bSameRotated)
			{
			Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
			Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
			Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
			Quit("Diff check failed");
			}
		}
	}
#endif
	}
Example #8
0
void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a)
	{
	assert(GuideTree.IsRooted());

#if	TRACE
	Log("GuideTree:\n");
	GuideTree.LogMe();
#endif

	const unsigned uSeqCount = v.Length();
	const unsigned uNodeCount = 2*uSeqCount - 1;

	ProgNode *ProgNodes = new ProgNode[uNodeCount];

	unsigned uJoin = 0;
	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
	SetProgressDesc("Align node");
	do
		{
		if (GuideTree.IsLeaf(uTreeNodeIndex))
			{
			if (uTreeNodeIndex >= uNodeCount)
				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
			ProgNode &Node = ProgNodes[uTreeNodeIndex];
			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
			if (uId >= uSeqCount)
				Quit("Seq index out of range");
			const Seq &s = *(v[uId]);
			Node.m_MSA.FromSeq(s);
			Node.m_MSA.SetSeqId(0, uId);
			Node.m_uLength = Node.m_MSA.GetColCount();
			}
		else
			{
			Progress(uJoin, uSeqCount - 1);
			++uJoin;

			const unsigned uMergeNodeIndex = uTreeNodeIndex;
			ProgNode &Parent = ProgNodes[uMergeNodeIndex];

			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);

			ProgNode &Node1 = ProgNodes[uLeft];
			ProgNode &Node2 = ProgNodes[uRight];

			PWPath Path;
			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
			Parent.m_uLength = Parent.m_MSA.GetColCount();

			Node1.m_MSA.Clear();
			Node2.m_MSA.Clear();
			}
		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);
	ProgressStepsDone();

	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
	a.Copy(RootProgNode.m_MSA);

	delete[] ProgNodes;
	ProgNodes = 0;
	}
Example #9
0
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("Tree1:\n");
	Tree1.LogMe();
	Log("\n");
	Log("Tree2:\n");
	Tree2.LogMe();
#endif

	if (!Tree1.IsRooted() || !Tree2.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = Tree1.GetNodeCount();
	const unsigned uNodeCount2 = Tree2.GetNodeCount();
	
	const unsigned uLeafCount = Tree1.GetLeafCount();
	const unsigned uLeafCount2 = Tree2.GetLeafCount();
	assert(uLeafCount == uLeafCount2);

	if (uNodeCount != uNodeCount2)
		Quit("DiffTrees: different node counts");

// Allocate tables so we can convert tree node index to
// and from the unique id with a O(1) lookup.
	unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
	unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];

	bool *bIsBachelor1 = new bool[uNodeCount];
	bool *bIsDiff1 = new bool[uNodeCount];

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		NodeIndexToId1[uNodeIndex] = uNodeCount;
		bIsBachelor1[uNodeIndex] = false;
		bIsDiff1[uNodeIndex] = false;

	// Use uNodeCount as value meaning "not set".
		IdToNodeIndex2[uNodeIndex] = uNodeCount;
		}

// Initialize node index <-> id lookup tables
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Tree1.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree1.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			NodeIndexToId1[uNodeIndex] = uId;
			}

		if (Tree2.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree2.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			IdToNodeIndex2[uId] = uNodeIndex;
			}
		}

// Validity check. This verifies that the ids
// pre-assigned to the leaves in Tree1 are unique
// (note that the id<N check above does not rule
// out two leaves having duplicate ids).
	for (unsigned uId = 0; uId < uLeafCount; ++uId)
		{
		unsigned uNodeIndex2 = IdToNodeIndex2[uId];
		if (uNodeCount == uNodeIndex2)
			Quit("DiffTrees, check 2");
		}

// Ids assigned to internal nodes are N, N+1 ...
// An internal node id uniquely identifies a set
// of two or more leaves.
	unsigned uInternalNodeId = uLeafCount;

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex1;
	  uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
		{
#if	TRACE
		Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
		  uNodeIndex1,
		  Tree1.IsLeaf(uNodeIndex1),
		  bIsBachelor1[uNodeIndex1]);
#endif

	// Leaves are trivial; nothing to do.
		if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
			continue;

	// If either child is a bachelor, flag
	// this node as a bachelor and continue.
		unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
		if (bIsBachelor1[uLeft1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

		unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
		if (bIsBachelor1[uRight1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

	// Both children are married.
	// Married nodes are guaranteed to have an id.
		unsigned uIdLeft = NodeIndexToId1[uLeft1];
		unsigned uIdRight = NodeIndexToId1[uRight1];

		if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
			Quit("DiffTrees, check 5");

	// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
		unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
		unsigned uRight2 = IdToNodeIndex2[uIdRight];

		if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
			Quit("DiffTrees, check 6");

	// If the spouses of uLeft1 and uRight1 have the same
	// parent, then this parent is the spouse of uNodeIndex1.
	// Otherwise, uNodeIndex1 is a diff.
		unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
		unsigned uParentRight2 = Tree2.GetParent(uRight2);

#if	TRACE
		Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
		  uLeft1,
		  uRight1,
		  uLeft2,
		  uRight2,
		  uParentLeft2,
		  uParentRight2);
#endif

		if (uParentLeft2 == uParentRight2)
			{
			NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
			IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
			++uInternalNodeId;
			}
		else
			bIsBachelor1[uNodeIndex1] = true;
		}

	unsigned uDiffCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (bIsBachelor1[uNodeIndex])
			continue;
		if (Tree1.IsRoot(uNodeIndex))
			{
		// Special case: if no bachelors, consider the
		// root a diff.
			if (!bIsBachelor1[uNodeIndex])
				bIsDiff1[uNodeIndex] = true;
			continue;
			}
		const unsigned uParent = Tree1.GetParent(uNodeIndex);
		if (bIsBachelor1[uParent])
			{
			bIsDiff1[uNodeIndex] = true;
			++uDiffCount;
			}
		}

#if	TRACE
	Log("Tree1:\n");
	Log("Node    Id  Bach  Diff  Name\n");
	Log("----  ----  ----  ----  ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u  %4u     %d     %d",
		  n,
		  NodeIndexToId1[n],
		  bIsBachelor1[n],
		  bIsDiff1[n]);
		if (Tree1.IsLeaf(n))
			Log("  %s", Tree1.GetLeafName(n));
		Log("\n");
		}
	Log("\n");
	Log("Tree2:\n");
	Log("Node    Id              Name\n");
	Log("----  ----              ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u                  ", n);
		if (Tree2.IsLeaf(n))
			Log("  %s", Tree2.GetLeafName(n));
		Log("\n");
		}
#endif

	Diffs.CreateRooted();
	const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
	const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();

	for (unsigned n = 0; n < uLeafCount; ++n)
		IdToDiffsLeafNodeIndex[n] = uNodeCount;

	BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
	  IdToDiffsLeafNodeIndex);

#if TRACE
	Log("\n");
	Log("Diffs:\n");
	Diffs.LogMe();
	Log("\n");
	Log("IdToDiffsLeafNodeIndex:");
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		if (n%16 == 0)
			Log("\n");
		else
			Log(" ");
		Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
		}
	Log("\n");
#endif

	for (unsigned n = 0; n < uLeafCount; ++n)
		if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
			Quit("TreeDiffs check 7");

	delete[] NodeIndexToId1;
	delete[] IdToNodeIndex2;

	delete[] bIsBachelor1;
	delete[] bIsDiff1;
	}