/// \brief Load a tree and a collection of alignments based on command line parameters. /// /// \param args The command line parameters. /// \param alignments The alignments. /// \param T The leaf-labelled tree. /// \param internal_sequences Should each resulting alignment have sequences for internal nodes on the tree? /// void load_As_and_T(const variables_map& args,vector<alignment>& alignments,RootedSequenceTree& T,const vector<bool>& internal_sequences) { alignments = load_As(args); T = load_T(args); link(alignments,T,internal_sequences); for(int i=0;i<alignments.size();i++) { //---------------- Randomize alignment? -----------------// if (args.count("randomize-alignment")) alignments[i] = randomize(alignments[i],T.n_leaves()); //------------------ Analyze 'internal'------------------// if ((args.count("internal") and args["internal"].as<string>() == "+") or args.count("randomize-alignment")) for(int column=0;column< alignments[i].length();column++) { for(int j=T.n_leaves();j<alignments[i].n_sequences();j++) alignments[i](column,j) = alphabet::not_gap; } //---- Check that internal sequence satisfy constraints ----// check_alignment(alignments[i],T,internal_sequences[i]); } }
/// \brief Load a tree and an alignment based on command line parameters. /// /// \param args The command line parameters. /// \param alignments The alignments. /// \param T The leaf-labelled tree. /// \param internal_sequences Should each resulting alignment have sequences for internal nodes on the tree? /// void load_A_and_T(const variables_map& args,alignment& A,RootedSequenceTree& T,bool internal_sequences) { A = load_A(args,internal_sequences); T = load_T(args); //------------- Link Alignment and Tree -----------------// link(A,T,internal_sequences); //---------------- Randomize alignment? -----------------// if (args.count("randomize-alignment")) A = randomize(A,T.n_leaves()); else if (args.count("unalign-all")) A = unalign_all(A,T.n_leaves()); //------------------ Analyze 'internal'------------------// if ((args.count("internal") and args["internal"].as<string>() == "+") or args.count("randomize-alignment")) for(int column=0;column< A.length();column++) { for(int i=T.n_leaves();i<A.n_sequences();i++) A.set_value(column,i, alphabet::not_gap ); } //---- Check that internal sequence satisfy constraints ----// check_alignment(A,T,internal_sequences); }
/// Load a tree from command line args "--tree filename" RootedSequenceTree load_T(const variables_map& args) { if (not args.count("tree")) throw myexception()<<"Tree file not specified! (--tree <filename>)"; RootedSequenceTree RT; RT.read(args["tree"].as<string>()); return RT; }
RootedSequenceTree::RootedSequenceTree(const RootedSequenceTree& T1, const RootedSequenceTree& T2) :RootedTree(T1,T2) { // We will create new names which will be the same as // T1.order + T2.order for(int i=0;i<T1.get_sequences().size();i++) sequences.push_back(T1.seq(i)); for(int i=0;i<T2.get_sequences().size();i++) sequences.push_back(T2.seq(i)); }
/// Construct a multifurcating tree representing topology constraints from file \a filename. /// /// \param filename The name of the file to load the tree from. /// \param names The order of the leaf labels. /// \return a multifurcating tree. /// SequenceTree load_constraint_tree(const string& filename,const vector<string>& names) { RootedSequenceTree RT; RT.read(filename); SequenceTree constraint = RT; remove_sub_branches(constraint); try{ remap_T_indices(constraint,names); } catch(const bad_mapping<string>& b) { bad_mapping<string> b2(b.missing,b.from); if (b.from == 0) b2<<"Constraint tree leaf sequence '"<<b2.missing<<"' not found in the alignment."; else b2<<"Alignment sequence '"<<b2.missing<<"' not found in the constraint tree."; throw b2; } return constraint; }
vector<SequenceTree> load_trees(const vector<string>& lines) { if (lines.size() == 0) throw myexception()<<"No trees were read in!"; vector<SequenceTree> trees; for(int i=0;i<lines.size();i++) { RootedSequenceTree T; try { T.parse(lines[i]); } catch (std::exception& e) { cerr<<"Exception: "<<e.what()<<endl; cerr<<" Quitting read of tree file"<<endl; break; } trees.push_back(T); } return trees; }
/// \brief Remap the leaf indices of tree \a T to match the alignment \a A: check the result /// /// \param A The alignment. /// \param T The tree. /// \param internal_sequences Should the resulting alignment have sequences for internal nodes on the tree? /// void link(alignment& A,RootedSequenceTree& T,bool internal_sequences) { check_names_unique(A); // Later, might we WANT sub-branches??? if (has_sub_branches(T)) remove_sub_branches(T); if (internal_sequences and not is_Cayley(T)) { assert(has_polytomy(T)); throw myexception()<<"Cannot link a multifurcating tree to an alignment with internal sequences."; } //------ IF sequences < leaf nodes THEN complain ---------// if (A.n_sequences() < T.n_leaves()) throw myexception()<<"Tree has "<<T.n_leaves()<<" leaves but Alignment only has " <<A.n_sequences()<<" sequences."; //----- IF sequences = leaf nodes THEN maybe add internal sequences. else if (A.n_sequences() == T.n_leaves()) { if (internal_sequences) A = add_internal(A,T); } //----- IF sequences > leaf nodes THEN maybe complain -------// else { if (not internal_sequences) throw myexception()<<"More alignment sequences than leaf nodes!"; if (A.n_sequences() > T.n_nodes()) throw myexception()<<"More alignment sequences than tree nodes!"; else if (A.n_sequences() < T.n_nodes()) throw myexception()<<"Fewer alignment sequences than tree nodes!"; } //---------- double-check that we have the right number of sequences ---------// if (internal_sequences) assert(A.n_sequences() == T.n_nodes()); else assert(A.n_sequences() == T.n_leaves()); //----- Remap leaf indices for T onto A's leaf sequence indices -----// remap_T_indices(T,A); if (internal_sequences) connect_leaf_characters(A,T); //---- Check to see that internal nodes satisfy constraints ----// check_alignment(A,T,internal_sequences); }
int main(int argc,char* argv[]) { try { //---------- Parse command line -------// variables_map args = parse_cmd_line(argc,argv); RootedSequenceTree T = load_T(args); int root=-1; if (args.count("outgroup")) { string outgroup = args["outgroup"].as<string>(); int leaf = find_leaf(T,outgroup); root = split_branch(T,leaf); } else if (args.count("taxa")) { string taxa = args["taxa"].as<string>(); vector<string> taxon = split(taxa,','); if (taxon.size() != 3) throw myexception()<<"You must supply exactly 3 taxa, but you supplied "<<taxon.size(); int n1 = find_leaf(T,taxon[0]); int n2 = find_leaf(T,taxon[1]); int n3 = find_leaf(T,taxon[2]); T.reroot(n1); root = T.common_ancestor(n2,n3); } else if (args.count("leaf")) { string leaf_name = args["leaf"].as<string>(); root = find_leaf(T,leaf_name); } else if (args.count("parent-of")) { string leaf_name = args["parent-of"].as<string>(); int leaf = find_leaf(T,leaf_name); root = T.branch(leaf).target(); } else if (args.count("branch")) { string p = args["branch"].as<string>(); vector<string> taxa = split(p,' '); dynamic_bitset<> mask(T.n_leaves()); dynamic_bitset<> group1(T.n_leaves()); int separator = find_index(taxa,string("|")); if (separator == -1) throw myexception()<<"Partition is missing a separator"; for(int i=0;i<separator;i++) { int ii = find_leaf(T,taxa[i]); mask[ii] = true; group1[ii] = true; } for(int i=separator+1;i<taxa.size();i++) { int ii = find_leaf(T,taxa[i]); mask[ii] = true; } Partition P(T.get_sequences(),group1,mask); cerr<<P<<endl; int b = which_partition(T,P); cerr<<partition_from_branch(T,b)<<endl; root = split_branch(T,b); } else throw myexception("neither --outgroup nor --taxa nor --leaf specified!"); T.reroot(root); std::cout<<T<<endl; } catch (std::exception& e) { std::cerr<<"tree-reroot: Error! "<<e.what()<<endl; exit(1); } return 0; }
bool reader_t::next_tree(RootedSequenceTree& T) { T.get_sequences() = leaf_names; return next_tree(static_cast<RootedTree&>(T)); }