Пример #1
0
void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
                     unsigned *ptruSubtreeCount)
{
    if (!tree.IsRooted())
        Quit("ClusterByHeight: requires rooted tree");

#if	TRACE
    Log("ClusterByHeight, max height=%g\n", dMaxHeight);
#endif

    unsigned uSubtreeCount = 0;
    const unsigned uNodeCount = tree.GetNodeCount();
    for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
    {
        if (tree.IsRoot(uNodeIndex))
            continue;
        unsigned uParent = tree.GetParent(uNodeIndex);
        double dHeight = tree.GetNodeHeight(uNodeIndex);
        double dParentHeight = tree.GetNodeHeight(uParent);

#if	TRACE
        Log("Node %3u  Height %5.2f  ParentHeight %5.2f\n",
            uNodeIndex, dHeight, dParentHeight);
#endif
        if (dParentHeight > dMaxHeight && dHeight <= dMaxHeight)
        {
            Subtrees[uSubtreeCount] = uNodeIndex;
#if	TRACE
            Log("Subtree[%u]=%u\n", uSubtreeCount, uNodeIndex);
#endif
            ++uSubtreeCount;
        }
    }
    *ptruSubtreeCount = uSubtreeCount;
}
Пример #2
0
// Return false when done
bool PhyEnumEdges(const Tree &tree, PhyEnumEdgeState &ES)
	{
	unsigned uNode1 = uInsane;

	if (!ES.m_bInit)
		{
		if (tree.GetNodeCount() <= 1)
			{
			ES.m_uNodeIndex1 = NULL_NEIGHBOR;
			ES.m_uNodeIndex2 = NULL_NEIGHBOR;
			return false;
			}
		uNode1 = tree.FirstDepthFirstNode();
		ES.m_bInit = true;
		}
	else
		{
		uNode1 = tree.NextDepthFirstNode(ES.m_uNodeIndex1);
		if (NULL_NEIGHBOR == uNode1)
			return false;
		if (tree.IsRooted() && tree.IsRoot(uNode1))
			{
			uNode1 = tree.NextDepthFirstNode(uNode1);
			if (NULL_NEIGHBOR == uNode1)
				return false;
			}
		}
	unsigned uNode2 = tree.GetParent(uNode1);

	ES.m_uNodeIndex1 = uNode1;
	ES.m_uNodeIndex2 = uNode2;
	return true;
	}
Пример #3
0
// Divide a tree containing N leaves into k families by
// cutting the tree at a horizontal line at some height.
// Each internal node defines a height for the cut,
// considering all internal nodes enumerates all distinct
// cuts. Visit internal nodes in decreasing order of height.
// Visiting the node corresponds to moving the horizontal
// line down to cut the tree at the height of that node.
// We consider the cut to be "infinitestimally below"
// the node, so the effect is to remove the current node
// from the list of subfamilies and add its two children.
// We must visit a parent before its children (so care may
// be needed to handle zero edge lengths properly).
// We assume that N is small, and write dumb O(N^2) code.
// More efficient strategies are possible for large N
// by maintaining a list of nodes sorted by height.
void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
                          unsigned Subfams[], unsigned *ptruSubfamCount)
{
    const unsigned uNodeCount = tree.GetNodeCount();
    const unsigned uLeafCount = (uNodeCount + 1)/2;

// Special case: empty tree
    if (0 == uNodeCount)
    {
        *ptruSubfamCount = 0;
        return;
    }

// Special case: more subfamilies than leaves
    if (uSubfamCount >= uLeafCount)
    {
        for (unsigned n = 0; n < uLeafCount; ++n)
            Subfams[n] = n;
        *ptruSubfamCount = uLeafCount;
        return;
    }

// Initialize list of subfamilies to be root
    Subfams[0] = tree.GetRootNodeIndex();

// Iterate
    for (unsigned i = 1; i < uSubfamCount; ++i)
        ClusterBySubfamCount_Iteration(tree, Subfams, i);

    *ptruSubfamCount = uSubfamCount;
}
Пример #4
0
void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes)
	{
    MuscleContext *ctx = getMuscleContext();
	const unsigned uSeqCount = msa.GetSeqCount();
	if (tree.GetLeafCount() != uSeqCount)
		Quit("Refine tree, tree has different number of nodes");

	if (uSeqCount < 3)
		return;

#if	DEBUG
	ValidateMuscleIds(msa);
	ValidateMuscleIds(tree);
#endif

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *uNewNodeIndexToOldNodeIndex= new unsigned[uNodeCount];

	Tree Tree2;
	TreeFromMSA(msa, Tree2, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2, ctx->params.g_pstrDistMxFileName2);

#if	DEBUG
	ValidateMuscleIds(Tree2);
#endif

	DiffTreesE(Tree2, tree, uNewNodeIndexToOldNodeIndex);

	unsigned uRoot = Tree2.GetRootNodeIndex();
	if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uRoot])
		{
		MSA msa2;
		RealignDiffsE(msa, v, Tree2, tree, uNewNodeIndexToOldNodeIndex, msa2, ProgNodes);
        if (!ctx->isCanceled()) {
            tree.Copy(Tree2);
		    msa.Copy(msa2);
#if	DEBUG
            ValidateMuscleIds(msa2);
#endif
        }
		}

	delete[] uNewNodeIndexToOldNodeIndex;

    if (ctx->isCanceled()) {
        throw MuscleException("Canceled");
    }

	SetCurrentAlignment(msa);
	ProgressStepsDone();

	}
Пример #5
0
static void LogLeafNames(const Tree &tree, unsigned uNodeIndex)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *Leaves = new unsigned[uNodeCount];
	unsigned uLeafCount;
	GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount);
	for (unsigned i = 0; i < uLeafCount; ++i)
		{
		if (i > 0)
			Log(",");
		Log("%s", tree.GetLeafName(Leaves[i]));
		}
	delete[] Leaves;
	}
Пример #6
0
void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[])
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	if (uNodeCount < 3)
		Quit("GetInternalNodesInHeightOrder: %u nodes, none are internal",
		  uNodeCount);
	const unsigned uInternalNodeCount = (uNodeCount - 1)/2;
	double *Heights = new double[uInternalNodeCount];

	unsigned uIndex = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;
		NodeIndexes[uIndex] = uNodeIndex;
		Heights[uIndex] = tree.GetNodeHeight(uNodeIndex);
		++uIndex;
		}
	if (uIndex != uInternalNodeCount)
		Quit("Internal error: GetInternalNodesInHeightOrder");

// Simple but slow bubble sort (probably don't care about speed here)
	bool bDone = false;
	while (!bDone)
		{
		bDone = true;
		for (unsigned i = 0; i < uInternalNodeCount - 1; ++i)
			{
			if (Heights[i] > Heights[i+1])
				{
				double dTmp = Heights[i];
				Heights[i] = Heights[i+1];
				Heights[i+1] = dTmp;

				unsigned uTmp = NodeIndexes[i];
				NodeIndexes[i] = NodeIndexes[i+1];
				NodeIndexes[i+1] = uTmp;
				bDone = false;
				}
			}
		}
#if	TRACE
	Log("Internal node index     Height\n");
	Log("-------------------   --------\n");
	//    1234567890123456789  123456789
	for (unsigned n = 0; n < uInternalNodeCount; ++n)
		Log("%19u  %9.3f\n", NodeIndexes[n], Heights[n]);
#endif
	delete[] Heights;
	}
Пример #7
0
// Identify subfamilies in a tree.
// Returns array of internal node indexes, one for each subfamily.
// First try is to select groups by height (which should approximate
// minimum percent identity), if this gives too many subfamilies then
// we cut at a point that gives the maximum allowed number of subfams.
static void GetSubfams(const Tree &tree, double dMaxHeight,
  unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams = new unsigned[uNodeCount];

	unsigned uSubfamCount;
	ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);

	if (uSubfamCount > uMaxSubfamCount)
		ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);

	*ptrptrSubfams = Subfams;
	*ptruSubfamCount = uSubfamCount;
	}
Пример #8
0
void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
		for (unsigned n = 0; n < uNeighborCount; ++n)
			{
			const unsigned uNeighborNodeIndex = tree.GetNeighbor(uNodeIndex, n);
			if (!tree.HasEdgeLength(uNodeIndex, uNeighborNodeIndex))
				continue;
			if (tree.GetEdgeLength(uNodeIndex, uNeighborNodeIndex) < dMinEdgeLength)
				tree.SetEdgeLength(uNodeIndex, uNeighborNodeIndex, dMinEdgeLength);
			}
		}
	}
Пример #9
0
static void LogSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	Log("%u subfamilies found\n", uSubfamCount);
	Log("Subfam  Sequence\n");
	Log("------  --------\n");
	unsigned *Leaves = new unsigned[uNodeCount];
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
		for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
			Log("%6u  %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
		Log("\n");
		}
	delete[] Leaves;
	}
Пример #10
0
static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
	{
	if (g_bStable)
		{
		const unsigned uNodeCount = tree.GetNodeCount();
		unsigned uNodeIndex = uPrevNodeIndex;
		for (;;)
			{
			++uNodeIndex;
			if (uNodeIndex >= uNodeCount)
				return NULL_NEIGHBOR;
			if (tree.IsLeaf(uNodeIndex))
				return uNodeIndex;
			}
		}
	unsigned uNodeIndex = uPrevNodeIndex;
	for (;;)
		{
		uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
		if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
			return uNodeIndex;
		}
	}
Пример #11
0
void TestBiPart()
	{
	SetListFileName("c:\\tmp\\lobster.log", false);
	Tree tree;
	TextFile fileIn("c:\\tmp\\test.phy");
	tree.FromFile(fileIn);
	tree.LogMe();

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *Leaves1 = new unsigned[uNodeCount];
	unsigned *Leaves2 = new unsigned[uNodeCount];

	PhyEnumEdgeState ES;
	bool bDone = false;
	for (;;)
		{
		unsigned uCount1 = uInsane;
		unsigned uCount2 = uInsane;
		bool bOk = PhyEnumBiParts(tree, ES, Leaves1, &uCount1, Leaves2, &uCount2);
		Log("PEBP=%d ES.Init=%d ES.ni1=%d ES.ni2=%d\n",
		  bOk,
		  ES.m_bInit,
		  ES.m_uNodeIndex1,
		  ES.m_uNodeIndex2);
		if (!bOk)
			break;
		Log("\n");
		Log("Part1: ");
		for (unsigned n = 0; n < uCount1; ++n)
			Log(" %d(%s)", Leaves1[n], tree.GetLeafName(Leaves1[n]));
		Log("\n");
		Log("Part2: ");
		for (unsigned n = 0; n < uCount2; ++n)
			Log(" %d(%s)", Leaves2[n], tree.GetLeafName(Leaves2[n]));
		Log("\n");
		}
	}
Пример #12
0
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
	{
#if	TRACE
	Log("CalcClustalWWeights\n");
	tree.LogMe();
#endif

	const unsigned uLeafCount = tree.GetLeafCount();
	if (0 == uLeafCount)
		return;
	else if (1 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 0.5;
		Weights[1] = (WEIGHT) 0.5;
		return;
		}

	if (!tree.IsRooted())
		Quit("CalcClustalWWeights requires rooted tree");

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));

	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
	if (uLeavesUnderRoot != uLeafCount)
		Quit("WeightsFromTreee: Internal error, root count %u %u",
		  uLeavesUnderRoot, uLeafCount);

#if	TRACE
	Log("Node  Leaves    Length  Strength\n");
	Log("----  ------  --------  --------\n");
	//    1234  123456  12345678  12345678
#endif

	double *Strengths = new double[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsRoot(uNodeIndex))
			{
			Strengths[uNodeIndex] = 0.0;
			continue;
			}
		const unsigned uParent = tree.GetParent(uNodeIndex);
		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
		const double dStrength = dLength / (double) uLeaves;
		Strengths[uNodeIndex] = dStrength;
#if	TRACE
		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
#endif
		}

#if	TRACE
	Log("\n");
	Log("                 Seq  Path..Weight\n");
	Log("--------------------  ------------\n");
#endif
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
#if	TRACE
		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
#endif
		if (!tree.IsLeaf(uLeafNodeIndex))
			Quit("CalcClustalWWeights: leaf");

		double dWeight = 0;
		unsigned uNode = uLeafNodeIndex;
		while (!tree.IsRoot(uNode))
			{
			dWeight += Strengths[uNode];
			uNode = tree.GetParent(uNode);
#if	TRACE
			Log("->%u(%g)", uNode, Strengths[uNode]);
#endif
			}
		if (dWeight < 0.0001)
			{
#if	TRACE
			Log("zero->one");
#endif
			dWeight = 1.0;
			}
		Weights[n] = (WEIGHT) dWeight;
#if	TRACE
		Log(" = %g\n", dWeight);
#endif
		}

	delete[] Strengths;
	delete[] LeavesUnderNode;

	Normalize(Weights, uLeafCount);
	}
Пример #13
0
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("Tree1:\n");
	Tree1.LogMe();
	Log("\n");
	Log("Tree2:\n");
	Tree2.LogMe();
#endif

	if (!Tree1.IsRooted() || !Tree2.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = Tree1.GetNodeCount();
	const unsigned uNodeCount2 = Tree2.GetNodeCount();
	
	const unsigned uLeafCount = Tree1.GetLeafCount();
	const unsigned uLeafCount2 = Tree2.GetLeafCount();
	assert(uLeafCount == uLeafCount2);

	if (uNodeCount != uNodeCount2)
		Quit("DiffTrees: different node counts");

// Allocate tables so we can convert tree node index to
// and from the unique id with a O(1) lookup.
	unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
	unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];

	bool *bIsBachelor1 = new bool[uNodeCount];
	bool *bIsDiff1 = new bool[uNodeCount];

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		NodeIndexToId1[uNodeIndex] = uNodeCount;
		bIsBachelor1[uNodeIndex] = false;
		bIsDiff1[uNodeIndex] = false;

	// Use uNodeCount as value meaning "not set".
		IdToNodeIndex2[uNodeIndex] = uNodeCount;
		}

// Initialize node index <-> id lookup tables
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Tree1.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree1.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			NodeIndexToId1[uNodeIndex] = uId;
			}

		if (Tree2.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree2.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			IdToNodeIndex2[uId] = uNodeIndex;
			}
		}

// Validity check. This verifies that the ids
// pre-assigned to the leaves in Tree1 are unique
// (note that the id<N check above does not rule
// out two leaves having duplicate ids).
	for (unsigned uId = 0; uId < uLeafCount; ++uId)
		{
		unsigned uNodeIndex2 = IdToNodeIndex2[uId];
		if (uNodeCount == uNodeIndex2)
			Quit("DiffTrees, check 2");
		}

// Ids assigned to internal nodes are N, N+1 ...
// An internal node id uniquely identifies a set
// of two or more leaves.
	unsigned uInternalNodeId = uLeafCount;

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex1;
	  uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
		{
#if	TRACE
		Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
		  uNodeIndex1,
		  Tree1.IsLeaf(uNodeIndex1),
		  bIsBachelor1[uNodeIndex1]);
#endif

	// Leaves are trivial; nothing to do.
		if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
			continue;

	// If either child is a bachelor, flag
	// this node as a bachelor and continue.
		unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
		if (bIsBachelor1[uLeft1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

		unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
		if (bIsBachelor1[uRight1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

	// Both children are married.
	// Married nodes are guaranteed to have an id.
		unsigned uIdLeft = NodeIndexToId1[uLeft1];
		unsigned uIdRight = NodeIndexToId1[uRight1];

		if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
			Quit("DiffTrees, check 5");

	// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
		unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
		unsigned uRight2 = IdToNodeIndex2[uIdRight];

		if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
			Quit("DiffTrees, check 6");

	// If the spouses of uLeft1 and uRight1 have the same
	// parent, then this parent is the spouse of uNodeIndex1.
	// Otherwise, uNodeIndex1 is a diff.
		unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
		unsigned uParentRight2 = Tree2.GetParent(uRight2);

#if	TRACE
		Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
		  uLeft1,
		  uRight1,
		  uLeft2,
		  uRight2,
		  uParentLeft2,
		  uParentRight2);
#endif

		if (uParentLeft2 == uParentRight2)
			{
			NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
			IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
			++uInternalNodeId;
			}
		else
			bIsBachelor1[uNodeIndex1] = true;
		}

	unsigned uDiffCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (bIsBachelor1[uNodeIndex])
			continue;
		if (Tree1.IsRoot(uNodeIndex))
			{
		// Special case: if no bachelors, consider the
		// root a diff.
			if (!bIsBachelor1[uNodeIndex])
				bIsDiff1[uNodeIndex] = true;
			continue;
			}
		const unsigned uParent = Tree1.GetParent(uNodeIndex);
		if (bIsBachelor1[uParent])
			{
			bIsDiff1[uNodeIndex] = true;
			++uDiffCount;
			}
		}

#if	TRACE
	Log("Tree1:\n");
	Log("Node    Id  Bach  Diff  Name\n");
	Log("----  ----  ----  ----  ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u  %4u     %d     %d",
		  n,
		  NodeIndexToId1[n],
		  bIsBachelor1[n],
		  bIsDiff1[n]);
		if (Tree1.IsLeaf(n))
			Log("  %s", Tree1.GetLeafName(n));
		Log("\n");
		}
	Log("\n");
	Log("Tree2:\n");
	Log("Node    Id              Name\n");
	Log("----  ----              ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u                  ", n);
		if (Tree2.IsLeaf(n))
			Log("  %s", Tree2.GetLeafName(n));
		Log("\n");
		}
#endif

	Diffs.CreateRooted();
	const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
	const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();

	for (unsigned n = 0; n < uLeafCount; ++n)
		IdToDiffsLeafNodeIndex[n] = uNodeCount;

	BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
	  IdToDiffsLeafNodeIndex);

#if TRACE
	Log("\n");
	Log("Diffs:\n");
	Diffs.LogMe();
	Log("\n");
	Log("IdToDiffsLeafNodeIndex:");
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		if (n%16 == 0)
			Log("\n");
		else
			Log(" ");
		Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
		}
	Log("\n");
#endif

	for (unsigned n = 0; n < uLeafCount; ++n)
		if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
			Quit("TreeDiffs check 7");

	delete[] NodeIndexToId1;
	delete[] IdToNodeIndex2;

	delete[] bIsBachelor1;
	delete[] bIsDiff1;
	}
Пример #14
0
void DoMuscle()
	{
	SetOutputFileName(g_pstrOutFileName.get());
	SetInputFileName(g_pstrInFileName.get());

	SetMaxIters(g_uMaxIters.get());
	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile fileIn(g_pstrInFileName.get());
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType.get())
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

//
// AED 21/12/06: Moved matrix loading code inside the PP param function so it gets called for all alignment types
//
	SetPPScore();


	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags.get() = g_bDiags1.get();
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName.get());
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName.get(), true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName.get())
		{
	// Discourage users...
		if (!g_bUseTreeNoWarn.get())
			fprintf(stderr, g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName.get());
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1.get(), g_Distance1.get(), g_Root1.get(),
		  g_pstrDistMxFileName1.get());

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly.get())
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	ProgNode *ProgNodes = 0;
	if (g_bLow.get())
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName.get())
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName.get(), msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters.get() || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName.get(), true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName.get())
		{
		g_bDiags.get() = g_bDiags2.get();
		SetIter(2);

		if (g_bLow.get())
			{
			if (0 != g_uMaxTreeRefineIters.get())
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2.get());
	SetMuscleTree(GuideTree);

	if (g_bAnchors.get())
		RefineVert(msa, GuideTree, g_uMaxIters.get() - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters.get() - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters.get() - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName.get(), true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
Пример #15
0
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
  unsigned NewNodeIndexToOldNodeIndex[])
	{
#if	TRACE
	Log("DiffTreesE NewTree:\n");
	NewTree.LogMe();
	Log("\n");
	Log("OldTree:\n");
	OldTree.LogMe();
#endif

	if (!NewTree.IsRooted() || !OldTree.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = NewTree.GetNodeCount();
	const unsigned uOldNodeCount = OldTree.GetNodeCount();
	const unsigned uLeafCount = NewTree.GetLeafCount();
	const unsigned uOldLeafCount = OldTree.GetLeafCount();
	if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
		Quit("DiffTreesE: different node counts");

	{
	unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
	for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
		{
		if (OldTree.IsLeaf(uOldNodeIndex))
			{
			unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
			IdToOldNodeIndex[Id] = uOldNodeIndex;
			}
		}

// Initialize NewNodeIndexToOldNodeIndex[]
// All internal nodes are marked as changed, but may be updated later.
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
			assert(uId < uLeafCount);

			unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
			assert(uOldNodeIndex < uNodeCount);

			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
			}
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}
	delete[] IdToOldNodeIndex;
	}

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNewNodeIndex;
	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			continue;

	// If either child is changed, flag this node as changed and continue.
		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
		if (NODE_CHANGED == uOldLeft)
			{
			NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
			continue;
			}

		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
		unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
			{
			NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
			continue;
			}

		unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
		unsigned uOldParentRight = OldTree.GetParent(uOldRight);
		if (uOldParentLeft == uOldParentRight)
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}

#if TRACE
	{
	Log("NewToOld ");
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		Log(" [%3u]=", uNewNodeIndex);
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
			Log("  X");
		else
			Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
		if ((uNewNodeIndex+1)%8 == 0)
			Log("\n         ");
		}
	Log("\n");
	}
#endif

#if	DEBUG
	{
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			if (uOld >= uNodeCount)
				{
				Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
				  uNewNodeIndex, uOld, uNodeCount);
				Quit("Diff check failed");
				}
			unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
			unsigned uIdOld = OldTree.GetLeafId(uOld);
			if (uIdNew != uIdOld)
				{
				Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
				  uNewNodeIndex, uOld, uIdNew, uIdOld);
				Quit("Diff check failed");
				}
			continue;
			}

		if (NODE_CHANGED == uOld)
			continue;

		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);

		unsigned uOldLeft = OldTree.GetLeft(uOld);
		unsigned uOldRight = OldTree.GetRight(uOld);

		unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
		unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];

		bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
		bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
		if (!bSameNotRotated && !bSameRotated)
			{
			Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
			Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
			Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
			Quit("Diff check failed");
			}
		}
	}
#endif
	}
Пример #16
0
void DoMuscle(CompositeVect*CVLocation)
	{
	SetOutputFileName(g_pstrOutFileName);
	SetInputFileName(g_pstrInFileName);

	SetMaxIters(g_uMaxIters);
	SetSeqWeightMethod(g_SeqWeight1);

	TextFile fileIn(g_pstrInFileName);
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

	PTR_SCOREMATRIX UserMatrix = 0;
	if (0 != g_pstrMatrixFileName)
		{
		const char *FileName = g_pstrMatrixFileName;
		const char *Path = getenv("MUSCLE_MXPATH");
		if (Path != 0)
			{
			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
			char *NewFileName = new char[n];
			sprintf(NewFileName, "%s/%s", Path, FileName);
			FileName = NewFileName;
			}
		TextFile File(FileName);
		UserMatrix = ReadMx(File);
		g_Alpha = ALPHA_Amino;
		g_PPScore = PPSCORE_SP;
		}

	SetPPScore();

	if (0 != UserMatrix)
		g_ptrScoreMatrix = UserMatrix;

	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags = g_bDiags1;
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName);
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName, true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName)
	{
	// Discourage users...
		if (!g_bUseTreeNoWarn)
			fprintf(stderr, "%s", g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName);
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
		  g_pstrDistMxFileName1);

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly)
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	msa.SetCompositeVector(CVLocation);
	ProgNode *ProgNodes = 0;
	if (g_bLow)
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName)
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName, msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName, true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName)
		{
		g_bDiags = g_bDiags2;
		SetIter(2);

		if (g_bLow)
			{
			if (0 != g_uMaxTreeRefineIters)
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2);
	SetMuscleTree(GuideTree);

	if (g_bAnchors)
		RefineVert(msa, GuideTree, g_uMaxIters - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName, true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
Пример #17
0
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	bool *Ready = new bool[uNodeCount];
	MSA **MSAs = new MSA *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		Ready[uNodeIndex] = false;
		MSAs[uNodeIndex] = 0;
		}

	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uNodeIndex = Subfams[uSubfamIndex];
		Ready[uNodeIndex] = true;
		MSA *ptrMSA = new MSA;
	// TODO: Wasteful copy, needs re-design
		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
		MSAs[uNodeIndex] = ptrMSA;
		}

	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex;
	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;

		unsigned uRight = tree.GetRight(uNodeIndex);
		unsigned uLeft = tree.GetLeft(uNodeIndex);
		if (!Ready[uRight] || !Ready[uLeft])
			continue;

		MSA *ptrLeft = MSAs[uLeft];
		MSA *ptrRight = MSAs[uRight];
		assert(ptrLeft != 0 && ptrRight != 0);

		MSA *ptrParent = new MSA;

		PWPath Path;
		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);

		MSAs[uNodeIndex] = ptrParent;
		Ready[uNodeIndex] = true;
		Ready[uLeft] = false;
		Ready[uRight] = false;

		delete MSAs[uLeft];
		delete MSAs[uRight];
		MSAs[uLeft] = 0;
		MSAs[uRight] = 0;
		}

#if	DEBUG
	{
	unsigned uReadyCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Ready[uNodeIndex])
			{
			assert(tree.IsRoot(uNodeIndex));
			++uReadyCount;
			assert(0 != MSAs[uNodeIndex]);
			}
		else
			assert(0 == MSAs[uNodeIndex]);
		}
	assert(1 == uReadyCount);
	}
#endif

	const unsigned uRoot = tree.GetRootNodeIndex();
	MSA *ptrRootAlignment = MSAs[uRoot];

	msa.Copy(*ptrRootAlignment);

	delete ptrRootAlignment;
    delete[] Ready;

#if	TRACE
	Log("After refine subfamilies, root alignment=\n");
	msa.LogMe();
#endif
	}
Пример #18
0
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	if (uSeqCount < 3)
		return false;

	const double dMaxHeight = 0.6;
	const unsigned uMaxSubfamCount = 16;
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams;
	unsigned uSubfamCount;
	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
	assert(uSubfamCount <= uSeqCount);

	if (g_bVerbose.get())
		LogSubfams(tree, Subfams, uSubfamCount);

	MSA *SubfamMSAs = new MSA[uSubfamCount];
	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned *Ids = new unsigned[uSeqCount];

	bool bAnyChanges = false;
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfam = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
		assert(uLeafCount <= uSeqCount);

		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);

		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
		DeleteGappedCols(msaSubfam);

#if	TRACE
		Log("Subfam %u MSA=\n", uSubfamIndex);
		msaSubfam.LogMe();
#endif

		if (msaSubfam.GetSeqCount() <= 2)
			continue;

	// TODO /////////////////////////////////////////
	// Try using existing tree, may actually hurt to
	// re-estimate, may also be a waste of CPU & mem.
	/////////////////////////////////////////////////
		Tree SubfamTree;
		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());

		bool bAnyChangesThisSubfam;
		if (g_bAnchors.get())
			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
		else
			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if	TRACE
		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
		if (bAnyChangesThisSubfam)
			bAnyChanges = true;
		}

	if (bAnyChanges)
		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);

	delete[] Leaves;
	delete[] Subfams;
	delete[] SubfamMSAs;

	return bAnyChanges;
	}
Пример #19
0
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
Пример #20
0
void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2,
  ROOT RootMethod)
	{
#if	TRACE
	tree.LogMe();
#endif
	if (tree.IsRooted())
		Quit("FindRoot: tree already rooted");

	const unsigned uNodeCount = tree.GetNodeCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	if (uNodeCount < 2)
		Quit("Root: don't support trees with < 2 edges");

	EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		EIs[uNodeIndex] = new EdgeInfo[3];

	EdgeList Edges;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		if (tree.IsLeaf(uNodeIndex))
			{
			unsigned uParent = tree.GetNeighbor1(uNodeIndex);
			Edges.Add(uParent, uNodeIndex);
			}

#if	TRACE
	Log("Edges: ");
	Edges.LogMe();
#endif

// Main loop: iterate until all distances known
	double dAllMaxDist = -1e20;
	unsigned uMaxFrom = NULL_NEIGHBOR;
	unsigned uMaxTo = NULL_NEIGHBOR;
	for (;;)
		{
		EdgeList NextEdges;

#if	TRACE
		Log("\nTop of main loop\n");
		Log("Edges: ");
		Edges.LogMe();
		Log("MDs:\n");
		ListEIs(EIs, uNodeCount);
#endif

	// For all edges
		const unsigned uEdgeCount = Edges.GetCount();
		if (0 == uEdgeCount)
			break;
		for (unsigned n = 0; n < uEdgeCount; ++n)
			{
			unsigned uNodeFrom;
			unsigned uNodeTo;
			Edges.GetEdge(n, &uNodeFrom, &uNodeTo);

			CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
#if	TRACE
			Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
#endif
			const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
			for (unsigned i = 0; i < uNeighborCount; ++i)
				{
				const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
				if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
				  AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
					NextEdges.Add(uNeighborIndex, uNodeFrom);
				}
			}
		Edges.Copy(NextEdges);
		}

#if	TRACE
	ListEIs(EIs, uNodeCount);
#endif

	switch (RootMethod)
		{
	case ROOT_MidLongestSpan:
		RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	case ROOT_MinAvgLeafDist:
		RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	default:
		Quit("Invalid RootMethod=%d", RootMethod);
		}

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		delete[] EIs[uNodeIndex];
	delete[] EIs;
	}
Пример #21
0
static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
  unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned uLeaf1 = NULL_NEIGHBOR;
	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
	double dMaxDist = -VERY_LARGE_DOUBLE;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (!tree.IsLeaf(uNodeIndex))
			continue;

		const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
		if (NULL_NEIGHBOR == uNode2)
			Quit("RootByMidLongestSpan: internal error 0");
		const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
		const EdgeInfo &EI = EIs[uNodeIndex][0];
		if (!EI.m_bSet)
			Quit("RootByMidLongestSpan: internal error 1");
		if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
			Quit("RootByMidLongestSpan: internal error 2");
		const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
		if (dSpanLength > dMaxDist)
			{
			dMaxDist = dSpanLength;
			uLeaf1 = uNodeIndex;
			uMostDistantLeaf = EI.m_uMostDistantLeaf;
			}
		}
	
	if (NULL_NEIGHBOR == uLeaf1)
		Quit("RootByMidLongestSpan: internal error 3");

	const double dTreeHeight = dMaxDist/2.0;
	unsigned uNode1 = uLeaf1;
	unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
	double dAccumSpanLength = 0;

#if	TRACE
	Log("RootByMidLongestSpan: span=%u", uLeaf1);
#endif

	for (;;)
		{
		const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
#if	TRACE
		Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
#endif
		if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
			{
			*ptruNode1 = uNode1;
			*ptruNode2 = uNode2;
			*ptrdLength1 = dTreeHeight - dAccumSpanLength;
			*ptrdLength2 = dEdgeLength - *ptrdLength1;
#if	TRACE
			{
			const EdgeInfo &EI = EIs[uLeaf1][0];
			Log("...\n");
			Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
			  uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
			}
#endif
			return;
			}

		if (tree.IsLeaf(uNode2))
			Quit("RootByMidLongestSpan: internal error 4");

		dAccumSpanLength += dEdgeLength;
		const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
		const EdgeInfo &EI = EIs[uNode1][uSub];
		if (!EI.m_bSet)
			Quit("RootByMidLongestSpan: internal error 5");

		uNode1 = uNode2;
		uNode2 = EI.m_uMaxStep;
		}
	}
Пример #22
0
void Tree::PruneTree(const Tree &tree, unsigned Subfams[],
                     unsigned uSubfamCount)
{
    if (!tree.IsRooted())
        Quit("Tree::PruneTree: requires rooted tree");

    Clear();

    m_uNodeCount = 2*uSubfamCount - 1;
    InitCache(m_uNodeCount);

    const unsigned uUnprunedNodeCount = tree.GetNodeCount();

    unsigned *uUnprunedToPrunedIndex = new unsigned[uUnprunedNodeCount];
    unsigned *uPrunedToUnprunedIndex = new unsigned[m_uNodeCount];

    for (unsigned n = 0; n < uUnprunedNodeCount; ++n)
        uUnprunedToPrunedIndex[n] = NULL_NEIGHBOR;

    for (unsigned n = 0; n < m_uNodeCount; ++n)
        uPrunedToUnprunedIndex[n] = NULL_NEIGHBOR;

// Create mapping between unpruned and pruned node indexes
    unsigned uInternalNodeIndex = uSubfamCount;
    for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
    {
        unsigned uUnprunedNodeIndex = Subfams[uSubfamIndex];
        uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uSubfamIndex;
        uPrunedToUnprunedIndex[uSubfamIndex] = uUnprunedNodeIndex;
        for (;;)
        {
            uUnprunedNodeIndex = tree.GetParent(uUnprunedNodeIndex);
            if (tree.IsRoot(uUnprunedNodeIndex))
                break;

            // Already visited this node?
            if (NULL_NEIGHBOR != uUnprunedToPrunedIndex[uUnprunedNodeIndex])
                break;

            uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uInternalNodeIndex;
            uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedNodeIndex;

            ++uInternalNodeIndex;
        }
    }

    const unsigned uUnprunedRootIndex = tree.GetRootNodeIndex();
    uUnprunedToPrunedIndex[uUnprunedRootIndex] = uInternalNodeIndex;
    uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedRootIndex;

#if	TRACE
    {
        Log("Pruned to unpruned:\n");
        for (unsigned i = 0; i < m_uNodeCount; ++i)
            Log(" [%u]=%u", i, uPrunedToUnprunedIndex[i]);
        Log("\n");
        Log("Unpruned to pruned:\n");
        for (unsigned i = 0; i < uUnprunedNodeCount; ++i)
        {
            unsigned n = uUnprunedToPrunedIndex[i];
            if (n != NULL_NEIGHBOR)
                Log(" [%u]=%u", i, n);
        }
        Log("\n");
    }
#endif

    if (uInternalNodeIndex != m_uNodeCount - 1)
        Quit("Tree::PruneTree, Internal error");

// Nodes 0, 1 ... are the leaves
    for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
    {
        char szName[32];
        sprintf(szName, "Subfam_%u", uSubfamIndex + 1);
        m_ptrName[uSubfamIndex] = strsave(szName);
    }

    for (unsigned uPrunedNodeIndex = uSubfamCount; uPrunedNodeIndex < m_uNodeCount;
            ++uPrunedNodeIndex)
    {
        unsigned uUnprunedNodeIndex = uPrunedToUnprunedIndex[uPrunedNodeIndex];

        const unsigned uUnprunedLeft = tree.GetLeft(uUnprunedNodeIndex);
        const unsigned uUnprunedRight = tree.GetRight(uUnprunedNodeIndex);

        const unsigned uPrunedLeft = uUnprunedToPrunedIndex[uUnprunedLeft];
        const unsigned uPrunedRight = uUnprunedToPrunedIndex[uUnprunedRight];

        const double dLeftLength =
            tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedLeft);
        const double dRightLength =
            tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedRight);

        m_uNeighbor2[uPrunedNodeIndex] = uPrunedLeft;
        m_uNeighbor3[uPrunedNodeIndex] = uPrunedRight;

        m_dEdgeLength1[uPrunedLeft] = dLeftLength;
        m_dEdgeLength1[uPrunedRight] = dRightLength;

        m_uNeighbor1[uPrunedLeft] = uPrunedNodeIndex;
        m_uNeighbor1[uPrunedRight] = uPrunedNodeIndex;

        m_bHasEdgeLength1[uPrunedLeft] = true;
        m_bHasEdgeLength1[uPrunedRight] = true;

        m_dEdgeLength2[uPrunedNodeIndex] = dLeftLength;
        m_dEdgeLength3[uPrunedNodeIndex] = dRightLength;

        m_bHasEdgeLength2[uPrunedNodeIndex] = true;
        m_bHasEdgeLength3[uPrunedNodeIndex] = true;
    }

    m_uRootNodeIndex = uUnprunedToPrunedIndex[uUnprunedRootIndex];

    m_bRooted = true;

    Validate();

    delete[] uUnprunedToPrunedIndex;
}
Пример #23
0
static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
  unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	const unsigned uLeafCount = tree.GetLeafCount();
	unsigned uNode1 = NULL_NEIGHBOR;
	unsigned uNode2 = NULL_NEIGHBOR;
	double dMinHeight = VERY_LARGE_DOUBLE;
	double dBestLength1 = VERY_LARGE_DOUBLE;
	double dBestLength2 = VERY_LARGE_DOUBLE;

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
		for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
			{
			const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);

		// Avoid visiting same edge a second time in reversed order.
			if (uNeighborIndex < uNodeIndex)
				continue;

			const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
			if (NULL_NEIGHBOR == uSubRev)
				Quit("RootByMinAvgLeafDist, internal error 1");

		// Get info for edges Node1->Node2 and Node2->Node1 (reversed)
			const EdgeInfo &EI = EIs[uNodeIndex][uSub];
			const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];

			if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
			  EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
				Quit("RootByMinAvgLeafDist, internal error 2");
			if (!EI.m_bSet)
				Quit("RootByMinAvgLeafDist, internal error 3");
			if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
				Quit("RootByMinAvgLeafDist, internal error 4");

			const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
			if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
				Quit("RootByMinAvgLeafDist, internal error 5");

		// Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
		//
        //	-----         ----
        //	     |       |
        //	     1----p--2
        //	     |       |
        //	-----         ----
		//
		// Define:
		//    ADLp = average distance to leaves to left of point p.
		//	  ADRp = average distance to leaves to right of point p.
		//	  L = edge length = distance 12
		//    x = distance 1p
		// So distance p2 = L - x.
		// Average distance from p to leaves on left of p is:
		//		ADLp = ADL1 + x
		// Average distance from p to leaves on right of p is:
		//		ADRp = ADR2 + (L - x)
		// To be a root, we require these two distances to be equal,
		//		ADLp = ADRp
		//		ADL1 + x = ADR2 + (L - x)
		// Solving for x,
		//		x = (ADR2 - ADL1 + L)/2
		// If 0 <= x <= L, we can place the root on edge 12.

			const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
			const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;

			const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
			if (x >= 0 && x <= dEdgeLength)
				{
				const double dLength1 = x;
				const double dLength2 = dEdgeLength - x;
				const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
				const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
				const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
#if	TRACE
				Log("Candidate root Node1=%u Node2=%u Height=%g\n",
				  uNodeIndex, uNeighborIndex, dHeight);
#endif
				if (dHeight < dMinHeight)
					{
					uNode1 = uNodeIndex;
					uNode2 = uNeighborIndex;
					dBestLength1 = dLength1;
					dBestLength2 = dLength2;
					dMinHeight = dHeight;
					}
				}
			}
		}

	if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
		Quit("RootByMinAvgLeafDist, internal error 6");

#if	TRACE
	Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
	  uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
#endif

	*ptruNode1 = uNode1;
	*ptruNode2 = uNode2;
	*ptrdLength1 = dBestLength1;
	*ptrdLength2 = dBestLength2;
	}