Esempio n. 1
0
/// \brief Load a collection of alignments based on command line parameters and generate a random tree.
///
/// \param args The command line parameters.
/// \param alignments The alignments.
/// \param T The leaf-labelled tree.
/// \param internal_sequences Should each resulting alignment have sequences for internal nodes on the tree?
/// 
void load_As_and_random_T(const variables_map& args,vector<alignment>& alignments,SequenceTree& T,const vector<bool>& internal_sequences)
{
  alignments = load_As(args);

  //------------- Load random tree ------------------------//
  SequenceTree TC = star_tree(sequence_names(alignments[0]));
  if (args.count("t-constraint"))
    TC = load_constraint_tree(args["t-constraint"].as<string>(),sequence_names(alignments[0]));

  T = TC;
  RandomTree(T,1.0);

  //-------------- Link --------------------------------//
  link(alignments,T,internal_sequences);

  //---------------process----------------//
  for(int i=0;i<alignments.size();i++) 
  {
    
    //---------------- Randomize alignment? -----------------//
    if (args.count("randomize-alignment"))
      alignments[i] = randomize(alignments[i],T.n_leaves());
  
    //------------------ Analyze 'internal'------------------//
    if ((args.count("internal") and args["internal"].as<string>() == "+")
	or args.count("randomize-alignment"))
      for(int column=0;column< alignments[i].length();column++) {
	for(int j=T.n_leaves();j<alignments[i].n_sequences();j++) 
	  alignments[i](column,j) = alphabet::not_gap;
      }

    //---- Check that internal sequence satisfy constraints ----//
    check_alignment(alignments[i],T,internal_sequences[i]);
  }
}
Esempio n. 2
0
/// \brief Load a tree and a collection of alignments based on command line parameters.
///
/// \param args The command line parameters.
/// \param alignments The alignments.
/// \param T The leaf-labelled tree.
/// \param internal_sequences Should each resulting alignment have sequences for internal nodes on the tree?
/// 
void load_As_and_T(const variables_map& args,vector<alignment>& alignments,SequenceTree& T,const vector<bool>& internal_sequences)
{
  alignments = load_As(args);

  T = load_T(args);

  link(alignments,T,internal_sequences);

  for(int i=0;i<alignments.size();i++) 
  {
    
    //---------------- Randomize alignment? -----------------//
    if (args.count("randomize-alignment"))
      alignments[i] = randomize(alignments[i],T.n_leaves());
  
    //------------------ Analyze 'internal'------------------//
    if ((args.count("internal") and args["internal"].as<string>() == "+")
	or args.count("randomize-alignment"))
      for(int column=0;column< alignments[i].length();column++) {
	for(int j=T.n_leaves();j<alignments[i].n_sequences();j++) 
	  alignments[i](column,j) = alphabet::not_gap;
      }

    //---- Check that internal sequence satisfy constraints ----//
    check_alignment(alignments[i],T,internal_sequences[i]);
  }
}
Esempio n. 3
0
/// \brief Load an alignment based on command line parameters and generate a random tree.
///
/// \param args The command line parameters.
/// \param alignments The alignments.
/// \param T The leaf-labelled tree.
/// \param internal_sequences Should each resulting alignment have sequences for internal nodes on the tree?
/// 
void load_A_and_random_T(const variables_map& args,alignment& A,SequenceTree& T,bool internal_sequences)
{
  // NO internal sequences, yet!
  A = load_A(args,internal_sequences);

  //------------- Load random tree ------------------------//
  SequenceTree TC = star_tree(sequence_names(A));
  if (args.count("t-constraint"))
    TC = load_constraint_tree(args["t-constraint"].as<string>(),sequence_names(A));

  T = TC;
  RandomTree(T,1.0);

  //------------- Link Alignment and Tree -----------------//
  link(A,T,internal_sequences);

  //---------------- Randomize alignment? -----------------//
  if (args.count("randomize-alignment"))
    A = randomize(A,T.n_leaves());
  
  //------------------ Analyze 'internal'------------------//
  if ((args.count("internal") and args["internal"].as<string>() == "+")
      or args.count("randomize-alignment"))
    for(int column=0;column< A.length();column++) {
      for(int i=T.n_leaves();i<A.n_sequences();i++) 
	A(column,i) = alphabet::not_gap;
    }

  //---- Check that internal sequence satisfy constraints ----//
  check_alignment(A,T,internal_sequences);
}
Esempio n. 4
0
alignment add_internal(alignment A,const SequenceTree& T) 
{
  // Complain if A and T don't correspond
  if (A.n_sequences() != T.n_leaves())
    throw myexception()<<"Number of sequence in alignment doesn't match number of leaves in tree"
		       <<"- can't add internal sequences";

  // Add empty sequences
  vector<sequence> S;
  for(int i=T.n_leaves();i<T.n_nodes();i++) 
  {
    sequence s;

    if (T.label(i) == "")
      throw myexception()<<"Adding internal sequences: Tree has missing internal node name!";

    s.name = T.label(i);

    S.push_back(s);
  }

  A.add_sequences(S);

  return A;
}
Esempio n. 5
0
void peel_n_mutations(const alphabet& a, const vector<int>& letters, const SequenceTree& T,
		      const ublas::matrix<B>& cost,ublas::matrix<B>& n_muts,
		      const vector<const_branchview>& branches)
{
  const int A = a.size();

  assert(letters.size() == T.n_leaves());
  assert(cost.size1() == A);
  assert(cost.size2() == A);

  // we need a scratch row in the matrix
  assert(n_muts.size1() == T.n_nodes());
  assert(n_muts.size2() == A);

  // compute the max cost -- is this approach a good idea?
  // Well... this apparently doesn't work.
  B max_cost = 0;
  for(int i=0;i<A;i++)
    for(int j=0;j<A;j++)
      max_cost = std::max(cost(i,j)+1, max_cost);
    
  // clear the length matrix.
  for(int i=0;i<n_muts.size1();i++)
    for(int j=0;j<n_muts.size2();j++)
      n_muts(i,j)=0;
  
  // set the leaf costs
  for(int s=0;s<T.n_leaves();s++)
  {
    int L = letters[s];

    if (a.is_letter_class(L))
      for(int l=0;l<A;l++)
	if (a.matches(l,L))
	  n_muts(s,l) = 0;
	else
	  n_muts(s,l) = max_cost;
  }


  // compute the costs for letters at each node
  for(int i=0;i<branches.size();i++)
  {
    int s = branches[i].source();
    int t = branches[i].target();

    // for each letter l of node target...
    for(int l=0;l<A;l++)
    {
      // compute minimum treelength for data behind source.
      B temp = n_muts(s,0)+cost(0,l);
      for(int k=1;k<A;k++)
	temp = min(temp, n_muts(s,k)+cost(k,l) );

      // add it to treelengths for data behind target
      n_muts(t,l) += temp;
    }
  }
}
Esempio n. 6
0
/// \brief  Remap the leaf indices of tree \a T to match the alignment \a A: check the result
///
/// \param A The alignment.
/// \param T The tree.
/// \param internal_sequences Should the resulting alignment have sequences for internal nodes on the tree?
///
void link(alignment& A,SequenceTree& T,bool internal_sequences) 
{
  check_names_unique(A);

  // Later, might we WANT sub-branches???
  if (has_sub_branches(T))
    remove_sub_branches(T);

  if (internal_sequences and not is_Cayley(T) and T.n_leaves() > 1) {
    assert(has_polytomy(T));
    throw myexception()<<"Cannot link a multifurcating tree to an alignment with internal sequences.";
  }

  //------ IF sequences < leaf nodes THEN complain ---------//
  if (A.n_sequences() < T.n_leaves())
    throw myexception()<<"Tree has "<<T.n_leaves()<<" leaves but Alignment only has "
		       <<A.n_sequences()<<" sequences.";

  //----- IF sequences = leaf nodes THEN maybe add internal sequences.
  else if (A.n_sequences() == T.n_leaves()) 
  {
    A = remap_A_indices(A,T);

    if (internal_sequences)
    {
      add_internal_labels(T);
      A = add_internal(A,T);
      connect_leaf_characters(A,T);
    }
  }
  //----- IF sequences > leaf nodes THEN maybe complain -------//
  else if (A.n_sequences() > T.n_nodes())
    throw myexception()<<"More alignment sequences ("<<A.n_sequences()<<") than tree nodes ("<<T.n_nodes()<<")!";
  else if (A.n_sequences() < T.n_nodes())
    throw myexception()<<"Fewer alignment sequences ("<<A.n_sequences()<<") than tree nodes ("<<T.n_nodes()<<")!";
  else
  {
    A = remap_A_indices(A,T);
  
    if (not internal_sequences) 
      A = chop_internal(A);
  }
  
  //---------- double-check that we have the right number of sequences ---------//
  if (internal_sequences)
    assert(A.n_sequences() == T.n_nodes());
  else
    assert(A.n_sequences() == T.n_leaves());

  //----- Check that each alignment sequence maps to a corresponding name in the tree -----//
  for(int i=0;i<A.n_sequences();i++)
    assert(T.get_label(i) == A.seq(i).name);

  //---- Check to see that internal nodes satisfy constraints ----//
  check_alignment(A,T,internal_sequences);
}
Esempio n. 7
0
/// \brief  Remap the leaf indices of tree \a T to match the alignment \a A: check the result
///
/// \param A The alignment.
/// \param T The tree.
/// \param internal_sequences Should the resulting alignment have sequences for internal nodes on the tree?
///
void link(alignment& A,SequenceTree& T,bool internal_sequences) 
{
  check_names_unique(A);

  // Later, might we WANT sub-branches???
  if (has_sub_branches(T))
    remove_sub_branches(T);

  if (internal_sequences and not is_Cayley(T)) {
    assert(has_polytomy(T));
    throw myexception()<<"Cannot link a multifurcating tree to an alignment with internal sequences.";
  }

  //------ IF sequences < leaf nodes THEN complain ---------//
  if (A.n_sequences() < T.n_leaves())
    throw myexception()<<"Tree has "<<T.n_leaves()<<" leaves but Alignment only has "
		       <<A.n_sequences()<<" sequences.";

  //----- IF sequences = leaf nodes THEN maybe add internal sequences.
  else if (A.n_sequences() == T.n_leaves()) {
    if (internal_sequences)
      A = add_internal(A,T);
  }
  //----- IF sequences > leaf nodes THEN maybe complain -------//
  else {
    if (not internal_sequences) {
      alignment A2 = chop_internal(A);
      if (A2.n_sequences() == T.n_leaves()) {
	A = A2;
      }
      else
	throw myexception()<<"More alignment sequences than leaf nodes!";
    } 
    else if (A.n_sequences() > T.n_nodes())
      throw myexception()<<"More alignment sequences than tree nodes!";
    else if (A.n_sequences() < T.n_nodes())
      throw myexception()<<"Fewer alignment sequences than tree nodes!";
  }
  
  //---------- double-check that we have the right number of sequences ---------//
  if (internal_sequences)
    assert(A.n_sequences() == T.n_nodes());
  else
    assert(A.n_sequences() == T.n_leaves());

  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  remap_T_indices(T,A);

  if (internal_sequences)
    connect_leaf_characters(A,T);

  //---- Check to see that internal nodes satisfy constraints ----//
  check_alignment(A,T,internal_sequences);
}
Esempio n. 8
0
void accum_branch_lengths_same_topology::operator()(const SequenceTree& T)
{
  if (not n_samples) {
    if (T.n_leaves() != Q.n_leaves())
      throw myexception()<<"Query tree has "<<Q.n_leaves()<<" leaves, but tree sample has "<<T.n_leaves()<<" leaves.";
  }

  n_samples++;
  if (update_lengths(Q,T,m1,m2,n1))
    n_matches++;
}
Esempio n. 9
0
/// Tree prior: topology & branch lengths (exponential)
efloat_t prior_exponential(const SequenceTree& T,double branch_mean) 
{
  efloat_t p = 1;

  // --------- uniform prior on topologies --------//
  if (T.n_leaves()>3)
    p /= num_topologies(T.n_leaves());

  // ---- Exponential prior on branch lengths ---- //
  for(int i=0;i<T.n_branches();i++) 
    p *= exponential_pdf(T.branch(i).length(), branch_mean);

  return p;
}
Esempio n. 10
0
void count_pair_distances::operator()(const SequenceTree& T)
{
  if (not initialized) {
    N = T.n_leaves();
    names = T.get_leaf_labels();
    m1.resize(N*(N-1)/2);
    m2.resize(N*(N-1)/2);
    m1 = 0;
    m2 = 0;
    initialized = true;
  }

  n_samples++;

  // Theoretically, we could do this much faster, I think.
  //  vector<vector<int> > leaf_sets = partition_sets(T);

  int k=0;
  for(int i=0;i<N;i++)
    for(int j=0;j<i;j++,k++) 
    {
      double D = 0;
      if (RF)
	D = T.edges_distance(i,j);
      else
	D = T.distance(i,j);
      m1[k] += D;
      m2[k] += D*D;
    }
}
Esempio n. 11
0
/// Tree prior: topology & branch lengths (gamma)
efloat_t prior_gamma(const SequenceTree& T,double branch_mean) 
{
  efloat_t p = 1;

  // --------- uniform prior on topologies --------//
  if (T.n_leaves()>3)
    p /= num_topologies(T.n_leaves());

  // ---- Exponential prior on branch lengths ---- //
  double a = 0.5;
  double b = branch_mean*2;

  for(int i=0;i<T.n_branches();i++) 
    p *= gamma_pdf(T.branch(i).length(), a, b);

  return p;
}
Esempio n. 12
0
Matrix getSimilarity(const SequenceTree& T,substitution::MultiModel& SM) {
  int n = T.n_leaves();
  Matrix S(n,n);

  for(int i=0;i<n;i++)
    for(int j=0;j<i;j++)
      S(i,j) = S(j,i) = getSimilarity(T.distance(i,j),SM);

  return S;
}
Esempio n. 13
0
/// \brief Re-index the leaves of tree \a T1 so that the labels have the same ordering as in \a T2.
///
/// \param T1 The leaf-labelled tree to re-index.
/// \param T2 The leaf-labelled tree to match.
///
void remap_T_indices(SequenceTree& T1,const SequenceTree& T2)
{
  if (T1.n_leaves() != T2.n_leaves())
    throw myexception()<<"Trees do not correspond: different numbers of leaves.";

  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  try {
    remap_T_indices(T1,T2.get_sequences());
  }
  catch(const bad_mapping<string>& b)
  {
    bad_mapping<string> b2(b.missing,b.from);
    if (b.from == 0)
      b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in second tree.";
    else
      b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in first tree.";
    throw b2;
  }
}
Esempio n. 14
0
Partition partition_from_branch(const SequenceTree& T,int b) 
{
  dynamic_bitset<> group(T.n_leaves());
  const dynamic_bitset<>& with_internal = T.partition(b);

  for(int i=0;i<group.size();i++)
    group[i] = with_internal[i];

  return Partition(T.get_leaf_labels(), group);
}
Esempio n. 15
0
/// \brief Re-index the leaves of tree \a T so that the labels have the same ordering as in \a A.
///
/// \param T The leaf-labelled tree.
/// \param A A multiple sequence alignment.
///
alignment remap_A_indices(alignment& A, const SequenceTree& T)
{
  vector<string> labels = T.get_labels();

  if (A.n_sequences() == T.n_leaves())
  {
    labels.resize(T.n_leaves());

  }
  else if (A.n_sequences() != T.n_nodes())
    throw myexception()<<"Cannot map alignment onto tree:\n  Alignment has "<<A.n_sequences()<<" sequences.\n  Tree has "<<T.n_leaves()<<" leaves and "<<T.n_nodes()<<" nodes.";
      

  for(int i=0;i<labels.size();i++)
    if (labels[i] == "")
    {
      if (i<T.n_leaves())
	throw myexception()<<"Tree has empty label for a leaf node: not allowed!";
      else
	throw myexception()<<"Alignment has internal node information, but tree has empty label for an internal node: not allowed!";
    }

  assert(A.n_sequences() == labels.size());

  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  try {
    vector<int> mapping = compute_mapping(labels, sequence_names(A));

    return reorder_sequences(A,mapping);
  }
  catch(const bad_mapping<string>& b)
  {
    bad_mapping<string> b2 = b;
    b2.clear();
    if (b.from == 0)
      b2<<"Couldn't find sequence \""<<b2.missing<<"\" in alignment.";
    else
      b2<<"Alignment sequence '"<<b2.missing<<"' not found in the tree.";
    throw b2;
  }
}
Esempio n. 16
0
/// \brief Re-index the leaves of tree \a T so that the labels have the same ordering as in \a A.
///
/// \param T The leaf-labelled tree.
/// \param A A multiple sequence alignment.
///
void remap_T_indices(SequenceTree& T,const alignment& A)
{
  if (A.n_sequences() < T.n_leaves())
    throw myexception()<<"Tree has "<<T.n_leaves()<<" leaves, but alignment has only "<<A.n_sequences()<<" sequences.";

  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  try {
    vector<string> names = sequence_names(A,T.n_leaves());  

    remap_T_indices(T,names);
  }
  catch(const bad_mapping<string>& b)
  {
    bad_mapping<string> b2(b.missing,b.from);
    if (b.from == 0)
      b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in alignment.";
    else
      b2<<"Alignment sequence '"<<b2.missing<<"' not found in the tree.";
    throw b2;
  }
}
Esempio n. 17
0
B n_mutations(const alignment& A, const SequenceTree& T,const ublas::matrix<B>& cost)
{
  const alphabet& a = A.get_alphabet();

  vector<int> letters(T.n_leaves());

  int root = T.directed_branch(0).target();

  vector<const_branchview> branches = branches_toward_node(T,root);

  ublas::matrix<B> n_muts(T.n_nodes(), a.size());

  double tree_length = 0;
  for(int c=0;c<A.length();c++) {
    for(int i=0;i<T.n_leaves();i++)
      letters[i] = A(c,i);
    double length = n_mutations<B>(a,letters,T,cost,n_muts,branches);
    tree_length += length;
  }

  return tree_length;
}
Esempio n. 18
0
/// Reorder internal sequences of \a A to correspond to standardized node names for \a T
alignment standardize(const alignment& A, const SequenceTree& T) 
{
  SequenceTree T2 = T;

  // if we don't have any internal node sequences, then we are already standardized
  if (A.n_sequences() == T.n_leaves())
    return A;

  // standardize NON-LEAF node and branch names in T
  vector<int> mapping = T2.standardize();
  vector<int> new_order = invert(mapping);

  return reorder_sequences(A,new_order);
}
Esempio n. 19
0
/// \brief Re-index the leaves of tree \a T so that the labels have the same ordering as in \a names.
///
/// \param T The leaf-labelled tree.
/// \param names The ordered leaf labels.
///
void remap_T_leaf_indices(SequenceTree& T,const vector<string>& names)
{
  assert(names.size() == T.n_leaves());
  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  try {
    vector<int> mapping = compute_mapping(T.get_leaf_labels(), names);

    T.standardize(mapping);
  }
  catch(const bad_mapping<string>& b)
  {
    bad_mapping<string> b2 = b;
    b2.clear();
    if (b2.from == 0)
      b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in names.";
    else
      b2<<"Sequence '"<<b2.missing<<"' not found in the tree.";
    throw b2;
  }
}
Esempio n. 20
0
// mark nodes in T according to what node of Q they map to
vector<int> get_nodes_map(const SequenceTree& Q,const SequenceTree& T,
			  const vector<int>& branches_map)
{
  assert(branches_map.size() == Q.n_branches() * 2);

  vector<int> nodes_map(T.n_nodes(),-1);

  // map nodes from T -> Q that are in both trees
  for(int b=0;b<Q.n_branches();b++)
  {
    int Q_source = Q.branch(b).source();
    int Q_target = Q.branch(b).target();

    int b2 = branches_map[b];

    int T_source = T.directed_branch(b2).source();
    int T_target = T.directed_branch(b2).target();

    if (nodes_map[T_source] == -1)
      nodes_map[T_source] = Q_source;
    else
      assert(nodes_map[T_source] == Q_source);

    if (nodes_map[T_target] == -1)
      nodes_map[T_target] = Q_target;
    else
      assert(nodes_map[T_target] == Q_target);
  }

  // map the rest of the nodes from T -> Q
  for(int i=Q.n_leaves();i<Q.n_nodes();i++) 
  {
    unsigned D = Q[i].degree();
    if (D <= 3) continue;

    // get a branch of Q pointing into the node
    const_branchview outside = *(Q[i].branches_in());
    // get a branch of T pointing into the node
    outside = T.directed_branch(branches_map[outside.name()]);

    list<const_branchview> branches;
    typedef list<const_branchview>::iterator list_iterator;
    append(outside.branches_after(),branches);
    for(list_iterator b = branches.begin() ; b != branches.end();)
    {
      int node = (*b).target();
      if (nodes_map[node] == -1)
	nodes_map[node] = i;

      if (nodes_map[node] == i) {
	append((*b).branches_after(),branches);
	b++;
      }
      else {
	list_iterator prev = b;
	b++;
	branches.erase(prev);
      }
    }
    assert(branches.size() == D-3);
  }

  for(int i=0;i<nodes_map.size();i++)
    assert(nodes_map[i] != -1);

  return nodes_map;
}
Esempio n. 21
0
int main(int argc,char* argv[])
{ 
  try {
    //---------- Parse command line  -------//
    variables_map args = parse_cmd_line(argc,argv);

    //----------- Load alignment and tree ---------//
    alignment A;
    SequenceTree T;
    if (args.count("tree"))
      load_A_and_T(args,A,T,false);
    else
      A = load_A(args,false);

    const alphabet& a = A.get_alphabet();
    
    //------- Load groups and find branches -------//
    vector<sequence_group> groups;
    if (args.count("groups")) 
      groups = load_groups(A,args["groups"].as<string>());

    for(int i=0;i<groups.size();i++) {
      cerr<<groups[i].name<<": ";
      for(int j=0;j<groups[i].taxa.size();j++)
	cerr<<A.seq(groups[i].taxa[j]).name<<" ";
      cerr<<endl;
    }

    vector<int> group_branches;
    if (args.count("tree"))
    {
      for(int i=0;i<groups.size();i++)
      {
	dynamic_bitset<> p(T.n_leaves());
	for(int j=0;j<groups[i].taxa.size();j++)
	  p[groups[i].taxa[j]] = true;

	int found = -1;
	for(int b=0;b<2*T.n_branches() and found == -1;b++)
	  if (p == branch_partition(T,b))
	    found = b;
	if (found == -1)
	  throw myexception()<<"I can't find group "<<i+1<<" on the tree!";
	
	group_branches.push_back(found);
      }
    }

    vector<string> group_names;
    for(int i=0;i<groups.size();i++)
      group_names.push_back(groups[i].name);

    vector<Partition> splits;
    if (args.count("split")) 
    {
      vector<string> split = args["split"].as<vector<string> >();
      for(int i=0;i<split.size();i++) 
	splits.push_back(Partition(group_names,split[i]));
    }

    //-------------------------------------------//

    Matrix C(A.length(),A.n_sequences()+1);
    for(int i=0;i<C.size1();i++)
      for(int j=0;j<C.size2();j++)
	C(i,j) = 0;

    // yes but, how much more conservation THAN EXPECTED do we see?

    for(int c=0;c<C.size1();c++) 
    {
      vector<bool> interesting(groups.size(), true);

      //-------------------------------------------------------//
      vector<int> leaf_letters( T.n_leaves() );
      for(int j=0;j<leaf_letters.size();j++)
	leaf_letters[j] = A(c,j);
      vector<vector<int> > node_letters = get_all_parsimony_letters(a,leaf_letters,T,unit_cost_matrix(a));

      vector<vector<int> > initial_value(groups.size());
      for(int g=0;g<groups.size();g++) 
      {
	int n = T.directed_branch(group_branches[g]).target();
	initial_value[g] = node_letters[n];
      }

      //------------ find 'group conserved at' values ----------//
      vector<int> value(groups.size(),alphabet::gap);

      for(int g=0;g<groups.size();g++) 
      {
	vector<int> temp;
	for(int i=0;i<groups[g].taxa.size();i++)
	  temp.push_back(A(c,groups[g].taxa[i]));

	int best = most_common(temp);
	int count = number_of(temp,best);
	
	if (count >= groups[g].taxa.size()-1 and count >=3 and count> groups[g].taxa.size()/2)
	  value[g] = best;
      }

      //-------- Determine whether column is interesting --------//
      if (args.count("require-all-different"))
      {
	if (args.count("split"))
	{
	  vector<bool> in_changed_split(groups.size(),false);
	  for(int i=0;i<splits.size();i++) 
	  {
	    bool some_different = false;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2]) {
		  if (all_different(A,c,groups,g1,g2))
		    some_different = true;
		}
	  
	    bool no_same = true;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2])
		  if (not all_different(A,c,groups,g1,g2))
		    no_same = false;

	    if (some_different and no_same) 
	      for(int g=0;g<groups.size();g++)
		if (splits[i].group1[g] or splits[i].group2[g])
		  in_changed_split[g] = true;
	  }

	  for(int g=0;g<groups.size();g++)
	    interesting[g] = interesting[g] and in_changed_split[g];
	}

	else {
	  bool different = false;
	  for(int g1=0;g1<groups.size();g1++)
	    for(int g2=0;g2<groups.size();g2++)
	      if (all_different(A,c,groups,g1,g2))
		different = true;

	  if (not different)
	    for(int g=0;g<groups.size();g++) 
	      interesting[g] = false;
	}
      }
    
      if (args.count("require-change"))
      {
	if (args.count("split"))
	{
	  vector<bool> in_changed_split(groups.size(),false);
	  for(int i=0;i<splits.size();i++) 
	  {
	    bool some_different = false;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2]) {
		  if (value[g1] != value[g2])
		    if (not args.count("ignore-rate-change") or 
			(value[g1] != alphabet::gap and value[g2] != alphabet::gap))
		      some_different = true;
		}
	  
	    bool no_same = true;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2])
		  if (value[g1] == value[g2])
		    no_same = false;

	    // This is Option #1 
	    //  - some conserved differences but no conserved similarities
	    // Also consider Option #2
	    //  - a change in both LETTER and CONSERVATION on at least one of the
	    //    two branches leading from the duplication.
	    if (some_different and no_same) 
	      for(int g=0;g<groups.size();g++)
		if (splits[i].group1[g] or splits[i].group2[g])
		  in_changed_split[g] = true;
	  }

	  for(int g=0;g<groups.size();g++)
	    interesting[g] = interesting[g] and in_changed_split[g];


	}

	else {
	  if (args.count("ignore-rate-change")) 
	  {
	    for(int g=0;g<groups.size();g++) 
	      interesting[g] = interesting[g] and not all_same_or(value,alphabet::gap);
	  }
	  else 
	  {
	    for(int g=0;g<groups.size();g++)
		interesting[g] = interesting[g] and not all_same(value);
	  }
	}
      }
    
      // A group is only interesting if its conserved
      if (args.count("require-conservation"))
	for(int g=0;g<groups.size();g++)
	  interesting[g] = interesting[g] and (value[g] != alphabet::gap);

      // A group is only interesting if it is in one of the splits
      if (args.count("split"))
	for(int g=0;g<groups.size();g++) {
	  bool found = false;
	  for(int i=0;i<splits.size() and not found;i++) 
	    if (splits[i].group1[g] or splits[i].group2[g])
	      found = true;
	  interesting[g] = interesting[g] and found;
	}

      //------------ print 'group conserved at' values ----------//
      cerr<<c+1<<"   ";
      for(int i=0;i<value.size();i++) 
	cerr<<a.lookup(value[i])<<" ";
      cerr<<"     ";

      for(int g=0;g<groups.size();g++) 
	if (interesting[g])
	  cerr<<"1 ";
	else
	  cerr<<"0 ";
      cerr<<endl;

      //------------- print parsimony initial values --------------//
      cerr<<"     ";
      for(int i=0;i<initial_value.size();i++) {
	for(int j=0;j<initial_value[i].size();j++)
	  cerr<<a.lookup(initial_value[i][j]);
	cerr<<" "; 
      }     
      cerr<<"    "<<n_mutations(a,leaf_letters,T,unit_cost_matrix(a))<<endl;
      cerr<<endl;



      //--------------------- Set highlighting ---------------------//
      // Interesting groups -> 1.0
      for(int g=0;g<groups.size();g++)
	if (interesting[g])
	  for(int i=0;i<groups[g].taxa.size();i++)
	    C(c,groups[g].taxa[i]) = 1.0;

      // Set conserved groups -> 0.5
      for(int g=0;g<groups.size();g++)
	if (value[g] != alphabet::gap)
	  for(int i=0;i<groups[g].taxa.size();i++)
	    C(c,groups[g].taxa[i]) = std::max(0.5, C(c,groups[g].taxa[i]));
    }


    cout<<join(sequence_names(A),' ')<<endl;
    for(int i=0;i<C.size1();i++) {
      vector<double> temp;
      for(int j=0;j<C.size2();j++)
	temp.push_back(C(i,j));
      cout<<join(temp,' ')<<endl;
    }
    
  }
  catch (std::exception& e) {
    std::cerr<<"alignment-find-conserved: Error! "<<e.what()<<endl;
    exit(1);
  }

  return 0;
}