コード例 #1
0
ファイル: estring.cpp プロジェクト: Unode/ext_apps
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}
コード例 #2
0
ファイル: seqvect.cpp プロジェクト: cran/muscle
void SeqVect::PadToMSA(MSA &msa)
	{
	unsigned uSeqCount = Length();
	if (0 == uSeqCount)
		{
		msa.Clear();
		return;
		}

	unsigned uLongestSeqLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		unsigned uColCount = ptrSeq->Length();
		if (uColCount > uLongestSeqLength)
			uLongestSeqLength = uColCount;
		}
	msa.SetSize(uSeqCount, uLongestSeqLength);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		msa.SetSeqName(uSeqIndex, ptrSeq->GetName());
		unsigned uColCount = ptrSeq->Length();
		unsigned uColIndex;
		for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			char c = ptrSeq->at(uColIndex);
			msa.SetChar(uSeqIndex, uColIndex, c);
			}
		while (uColIndex < uLongestSeqLength)
			msa.SetChar(uSeqIndex, uColIndex++, '.');
		}
	}
コード例 #3
0
ファイル: muscleout.cpp プロジェクト: Unode/ext_apps
void MuscleOutput(MSA &msa)
	{
	MHackEnd(msa);
	if (g_bStable)
		{
		MSA msaStable;
		Stabilize(msa, msaStable);
		msa.Clear();	// save memory
		DoOutput(msaStable);
		}
	else
		DoOutput(msa);
	}
コード例 #4
0
ファイル: seq.cpp プロジェクト: bigmuscle/bigmuscle
void Seq::ExtractUngapped(MSA &msa) const
	{
	msa.Clear();
	unsigned uColCount = Length();
	msa.SetSize(1, 1);
	unsigned uUngappedPos = 0;
	for (unsigned n = 0; n < uColCount; ++n)
		{
		char c = at(n);
		if (!IsGapChar(c))
			msa.SetChar(0, uUngappedPos++, c);
		}
	msa.SetSeqName(0, m_ptrName);
	}
コード例 #5
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void prepareAlignResults(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& ma, bool mhack) {
    if (mhack) {
        MHackEnd(msa);
    }
    MuscleContext* ctx = getMuscleContext();
    if (ctx->params.g_bStable) {
        MSA msaStable;
        Stabilize(msa, msaStable);
        msa.Clear();
        convertMSA2MAlignment(msaStable, al, ma);
    } else {
        convertMSA2MAlignment(msa, al, ma);
    }
}
コード例 #6
0
ファイル: subfam.cpp プロジェクト: Wyss/mauve-py
void ProgAlignSubFams()
	{
	MSA msaOut;

	SetOutputFileName(g_pstrOutFileName.get());
	SetInputFileName(g_pstrInFileName.get());

	SetMaxIters(g_uMaxIters.get());
	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile fileIn(g_pstrInFileName.get());
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType.get())
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

	PTR_SCOREMATRIX UserMatrix = 0;
	if (0 != g_pstrMatrixFileName.get())
		{
		const char *FileName = g_pstrMatrixFileName.get();
		const char *Path = getenv("MUSCLE_MXPATH");
		if (Path != 0)
			{
			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
			char *NewFileName = new char[n];
			sprintf(NewFileName, "%s/%s", Path, FileName);
			FileName = NewFileName;
			}
		TextFile File(FileName);
		UserMatrix = ReadMx(File);
		g_Alpha = ALPHA_Amino;
		g_PPScore = PPSCORE_SP;
		}

	SetPPScore();

	if (0 != UserMatrix)
		g_ptrScoreMatrix = UserMatrix;

	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
		{
		SetPPScore(PPSCORE_SPN);
		g_Distance1.get() = DISTANCE_Kmer4_6;
		}

	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags.get() = g_bDiags1.get();
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (uSeqCount > 1)
		MHackStart(v);

	if (0 == uSeqCount)
		{
		msaOut.Clear();
		return;
		}

	if (1 == uSeqCount && ALPHA_Amino == Alpha)
		{
		const Seq &s = v.GetSeq(0);
		msaOut.FromSeq(s);
		return;
		}

	Tree GuideTree;
	TreeFromSeqVect(v, GuideTree, g_Cluster1.get(), g_Distance1.get(), g_Root1.get());
	SetMuscleTree(GuideTree);

	MSA msa;
	if (g_bLow.get())
		{
		ProgNode *ProgNodes = 0;
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
		delete[] ProgNodes;
		}
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);
	TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());
	SetMuscleTree(GuideTree);

	unsigned *SubFams = new unsigned[uSeqCount];
	unsigned uSubFamCount;
	SubFam(GuideTree, g_uMaxSubFamCount.get(), SubFams, &uSubFamCount);

	SetProgressDesc("Align node");
	const unsigned uNodeCount = 2*uSeqCount - 1;

	ProgNode *ProgNodes = new ProgNode[uNodeCount];
	bool *NodeIsSubFam = new bool[uNodeCount];
	bool *NodeInSubFam = new bool[uNodeCount];

	for (unsigned i = 0; i < uNodeCount; ++i)
		{
		NodeIsSubFam[i] = false;
		NodeInSubFam[i] = false;
		}

	for (unsigned i = 0; i < uSubFamCount; ++i)
		{
		unsigned uNodeIndex = SubFams[i];
		assert(uNodeIndex < uNodeCount);
		NodeIsSubFam[uNodeIndex] = true;
		SetInFam(GuideTree, uNodeIndex, NodeInSubFam);
		}

	unsigned uJoin = 0;
	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
	do
		{
		if (NodeIsSubFam[uTreeNodeIndex])
			{
#if	TRACE
			Log("Node %d: align subfam\n", uTreeNodeIndex);
#endif
			ProgNode &Node = ProgNodes[uTreeNodeIndex];
			AlignSubFam(v, GuideTree, uTreeNodeIndex, Node.m_MSA);
			Node.m_uLength = Node.m_MSA.GetColCount();
			}
		else if (!NodeInSubFam[uTreeNodeIndex])
			{
#if	TRACE
			Log("Node %d: align two subfams\n", uTreeNodeIndex);
#endif
			Progress(uJoin, uSubFamCount - 1);
			++uJoin;

			const unsigned uMergeNodeIndex = uTreeNodeIndex;
			ProgNode &Parent = ProgNodes[uMergeNodeIndex];

			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);

			ProgNode &Node1 = ProgNodes[uLeft];
			ProgNode &Node2 = ProgNodes[uRight];

			PWPath Path;
			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
			Parent.m_uLength = Parent.m_MSA.GetColCount();

			Node1.m_MSA.Clear();
			Node2.m_MSA.Clear();
			}
		else
			{
#if	TRACE
			Log("Node %d: in subfam\n", uTreeNodeIndex);
#endif
			;
			}
		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);
	ProgressStepsDone();

	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];

	TextFile fOut(g_pstrOutFileName.get(), true);
	MHackEnd(RootProgNode.m_MSA);
	RootProgNode.m_MSA.ToFile(fOut);

	delete[] NodeInSubFam;
	delete[] NodeIsSubFam;
	delete[] ProgNodes;
	delete[] SubFams;

	ProgNodes = 0;
	NodeInSubFam = 0;
	NodeIsSubFam = 0;
	SubFams = 0;
	}
コード例 #7
0
ファイル: aligngivenpathsw.cpp プロジェクト: Wyss/mauve-py
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("AlignTwoMSAsGivenPathSW\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
	}