void DoMakeTree() { if (g_pstrInFileName.get() == 0 || g_pstrOutFileName.get() == 0) Quit("-maketree requires -in <msa> and -out <treefile>"); SetStartTime(); SetSeqWeightMethod(g_SeqWeight1.get()); TextFile MSAFile(g_pstrInFileName.get()); MSA msa; msa.FromFile(MSAFile); unsigned uSeqCount = msa.GetSeqCount(); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) msa.SetSeqId(uSeqIndex, uSeqIndex); SetMuscleInputMSA(msa); Progress("%u sequences", uSeqCount); Tree tree; TreeFromMSA(msa, tree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get()); TextFile TreeFile(g_pstrOutFileName.get(), true); tree.ToFile(TreeFile); Progress("Tree created"); }
void DoSP() { MuscleContext *ctx = getMuscleContext(); TextFile f(ctx->params.g_pstrSPFileName); MSA a; a.FromFile(f); ALPHA Alpha = ALPHA_Undefined; switch (ctx->params.g_SeqType) { case SEQTYPE_Auto: Alpha = a.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid SeqType"); } SetAlpha(Alpha); a.FixAlpha(); SetPPScore(); const unsigned uSeqCount = a.GetSeqCount(); if (0 == uSeqCount) Quit("No sequences in input file %s", ctx->params.g_pstrSPFileName); MSA::SetIdCount(uSeqCount); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) a.SetSeqId(uSeqIndex, uSeqIndex); SetSeqWeightMethod(ctx->params.g_SeqWeight1); Tree tree; TreeFromMSA(a, tree, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2); SetMuscleTree(tree); SetMSAWeightsMuscle((MSA &) a); SCORE SP = ObjScoreSP(a); Log("File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP); fprintf(stderr, "File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP); }
void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex, MSA &msaOut) { const unsigned uSeqCount = vAll.GetSeqCount(); const char *InTmp = "asf_in.tmp"; const char *OutTmp = "asf_out.tmp"; unsigned *Leaves = new unsigned[uSeqCount]; unsigned uLeafCount; GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount); SeqVect v; for (unsigned i = 0; i < uLeafCount; ++i) { unsigned uLeafNodeIndex = Leaves[i]; unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex); Seq &s = vAll.GetSeqById(uId); v.AppendSeq(s); } #if TRACE { Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount); for (unsigned i = 0; i < uLeafCount; ++i) Log(" %s", v.GetSeqName(i)); Log("\n"); } #endif TextFile fIn(InTmp, true); v.ToFASTAFile(fIn); fIn.Close(); char CmdLine[4096]; sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp); // sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp); system(CmdLine); TextFile fOut(OutTmp); msaOut.FromFile(fOut); for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex) { const char *Name = msaOut.GetSeqName(uSeqIndex); unsigned uId = vAll.GetSeqIdFromName(Name); msaOut.SetSeqId(uSeqIndex, uId); } unlink(InTmp); unlink(OutTmp); delete[] Leaves; }
void Refine() { SetOutputFileName(g_pstrOutFileName.get()); SetInputFileName(g_pstrInFileName.get()); SetStartTime(); SetMaxIters(g_uMaxIters.get()); SetSeqWeightMethod(g_SeqWeight1.get()); TextFile fileIn(g_pstrInFileName.get()); MSA msa; msa.FromFile(fileIn); const unsigned uSeqCount = msa.GetSeqCount(); if (0 == uSeqCount) Quit("No sequences in input file"); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType.get()) { case SEQTYPE_Auto: Alpha = msa.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid SeqType"); } SetAlpha(Alpha); msa.FixAlpha(); SetPPScore(); if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha) SetPPScore(PPSCORE_SPN); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) msa.SetSeqId(uSeqIndex, uSeqIndex); SetMuscleInputMSA(msa); Tree GuideTree; TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get()); SetMuscleTree(GuideTree); if (g_bAnchors.get()) RefineVert(msa, GuideTree, g_uMaxIters.get()); else RefineHoriz(msa, GuideTree, g_uMaxIters.get(), false, false); ValidateMuscleIds(msa); ValidateMuscleIds(GuideTree); // TextFile fileOut(g_pstrOutFileName.get(), true); // msa.ToFile(fileOut); MuscleOutput(msa); }
void DoRefineW() { SetOutputFileName(g_pstrOutFileName); SetInputFileName(g_pstrInFileName); SetStartTime(); SetMaxIters(g_uMaxIters); SetSeqWeightMethod(g_SeqWeight1); TextFile fileIn(g_pstrInFileName); MSA msa; msa.FromFile(fileIn); const unsigned uSeqCount = msa.GetSeqCount(); if (0 == uSeqCount) Quit("No sequences in input file"); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) msa.SetSeqId(uSeqIndex, uSeqIndex); SetMuscleInputMSA(msa); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType) { case SEQTYPE_Auto: Alpha = msa.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid SeqType"); } SetAlpha(Alpha); msa.FixAlpha(); if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha) SetPPScore(PPSCORE_SPN); MSA msaOut; RefineW(msa, msaOut); // ValidateMuscleIds(msa); // TextFile fileOut(g_pstrOutFileName, true); // msaOut.ToFile(fileOut); MuscleOutput(msaOut); }